RED-3992 - Replaced naive approach with aho corassick string search. Cleaned up code. reduced ammount of unnecesary conversions/invocations

This commit is contained in:
Timo Bejan 2022-05-06 10:28:12 +03:00
parent cfd9d7665b
commit 36967cab2c
30 changed files with 41585 additions and 216 deletions

View File

@ -5,7 +5,7 @@
<parent>
<groupId>com.iqser.red</groupId>
<artifactId>platform-docker-dependency</artifactId>
<version>1.1.0</version>
<version>1.2.0</version>
<relativePath />
</parent>
<modelVersion>4.0.0</modelVersion>
@ -42,7 +42,7 @@
<artifactId>docker-maven-plugin</artifactId>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<plugin>
@ -95,4 +95,4 @@
</plugins>
</pluginManagement>
</build>
</project>
</project>

View File

@ -24,6 +24,19 @@
<groupId>com.iqser.red.commons</groupId>
<artifactId>storage-commons</artifactId>
</dependency>
<dependency>
<groupId>org.ahocorasick</groupId>
<artifactId>ahocorasick</artifactId>
<version>0.6.3</version>
</dependency>
<dependency>
<groupId>org.openjdk.jol</groupId>
<artifactId>jol-core</artifactId>
<version>0.10</version>
</dependency>
<dependency>
<groupId>com.iqser.red.service</groupId>
<artifactId>redaction-service-api-v1</artifactId>

View File

@ -54,16 +54,6 @@ public class Dictionary {
}
public boolean containsValue(String type, String value) {
return localAccessMap.containsKey(type) && localAccessMap.get(type)
.getValues(false)
.contains(value) || localAccessMap.containsKey(type) && localAccessMap.get(type)
.getValues(true)
.contains(value);
}
public boolean isHint(String type) {
DictionaryModel model = localAccessMap.get(type);

View File

@ -23,4 +23,4 @@ public class DictionaryEntries {
@Builder.Default
Set<DictionaryEntry> falseRecommendations = new HashSet<>();
}
}

View File

@ -4,8 +4,10 @@ package com.iqser.red.service.redaction.v1.server.redaction.model;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
import lombok.AllArgsConstructor;
import lombok.Data;
import org.ahocorasick.trie.Trie;
import java.io.Serializable;
import java.util.HashSet;
import java.util.Set;
import java.util.stream.Collectors;
@ -14,30 +16,101 @@ import java.util.stream.Collectors;
@AllArgsConstructor
public class DictionaryModel implements Serializable {
private String type;
private int rank;
private float[] color;
private boolean caseInsensitive;
private boolean hint;
private Set<DictionaryEntry> entries;
private Set<DictionaryEntry> falsePositives;
private Set<DictionaryEntry> falseRecommendations;
private Set<String> localEntries;
private boolean isDossierDictionary;
private final String type;
private final int rank;
private final float[] color;
private final boolean caseInsensitive;
private final boolean hint;
private final boolean isDossierDictionary;
private final Set<DictionaryEntry> entries;
private final Set<DictionaryEntry> falsePositives;
private final Set<DictionaryEntry> falseRecommendations;
private transient Trie entriesTrie;
private transient Trie falsePositivesTrie;
private transient Trie falseRecommendationsTrie;
private transient Trie localEntriesTrie;
private final Set<String> localEntries = new HashSet<>();
public DictionaryModel(String type,
int rank,
float[] color,
boolean caseInsensitive,
boolean hint,
Set<DictionaryEntry> entries,
Set<DictionaryEntry> falsePositives,
Set<DictionaryEntry> falseRecommendations,
boolean isDossierDictionary) {
this.type = type;
this.rank = rank;
this.color = color;
this.caseInsensitive = caseInsensitive;
this.hint = hint;
this.isDossierDictionary = isDossierDictionary;
this.entries = entries;
this.falsePositives = falsePositives;
this.falseRecommendations = falseRecommendations;
this.entriesTrie = buildTrie(entries);
this.falsePositivesTrie = buildTrie(falsePositives);
this.falseRecommendationsTrie = buildTrie(falseRecommendations);
public Set<String> getValues(boolean local) {
return local ? localEntries : entries.stream().filter(e -> !e.isDeleted()).map(e -> e.getValue()).collect(Collectors
.toSet());
}
public Set<String> getFalsePositiveValues() {
return falsePositives.stream().filter(e -> !e.isDeleted()).map(e -> e.getValue()).collect(Collectors
.toSet());
public Trie getLocalEntriesTrie() {
if (localEntriesTrie == null) {
this.localEntriesTrie = buildTrieFromStrings(this.localEntries);
}
return localEntriesTrie;
}
public Set<String> getFalseRecommendationValues() {
return falseRecommendations.stream().filter(e -> !e.isDeleted()).map(e -> e.getValue()).collect(Collectors
.toSet());
public Trie getEntriesTrie() {
if (entriesTrie == null) {
this.entriesTrie = buildTrie(this.entries);
}
return entriesTrie;
}
public Trie getFalsePositivesTrie() {
if (falsePositivesTrie == null) {
this.falsePositivesTrie = buildTrie(this.falsePositives);
}
return falsePositivesTrie;
}
public Trie getFalseRecommendationsTrie() {
if (falsePositivesTrie == null) {
this.falsePositivesTrie = buildTrie(this.falseRecommendations);
}
return falsePositivesTrie;
}
private Trie buildTrieFromStrings(Set<String> entries) {
var builder = Trie.builder()
.addKeywords(entries);
if (this.isCaseInsensitive()) {
builder.ignoreCase();
}
return builder.build();
}
private Trie buildTrie(Set<DictionaryEntry> values) {
var builder = Trie.builder()
.addKeywords(values.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()));
if (this.isCaseInsensitive()) {
builder.ignoreCase();
}
return builder.build();
}
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
@ -11,6 +12,9 @@ import java.util.regex.Pattern;
public class SearchableText {
@JsonIgnore
private transient String stringRepresentation;
private final List<TextPositionSequence> sequences = new ArrayList<>();
@ -183,10 +187,16 @@ public class SearchableText {
@Override
public String toString() {
return buildString(sequences);
}
public String asString() {
if (stringRepresentation == null) {
stringRepresentation = buildString(sequences);
}
return stringRepresentation;
}
public String buildString(List<TextPositionSequence> sequences) {

View File

@ -1,34 +1,26 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import com.iqser.red.service.redaction.v1.model.ArgumentType;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.model.FileAttribute;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.FindEntityDetails;
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
import lombok.Builder;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
@Data
@Slf4j
@ -69,11 +61,13 @@ public class Section {
private List<FileAttribute> fileAttributes = new ArrayList<>();
@SuppressWarnings("unused")
@WhenCondition
public void addAiEntities(String type, String asType) {
Set<Entity> entitiesOfType = nerEntities.stream().filter(nerEntity -> nerEntity.getType().equals(type)).collect(Collectors.toSet());
Set<String> values = entitiesOfType.stream().map(Entity::getWord).collect(Collectors.toSet());
Set<Entity> found = EntitySearchUtils.findEntities(searchText, values, dictionary.getType(asType), headline, sectionNumber, false, false, Engine.NER, true, true);
List<String> values = entitiesOfType.stream().map(Entity::getWord).collect(Collectors.toList());
Set<Entity> found = EntitySearchUtils.findEntities(searchText, values, dictionary.getType(asType), new FindEntityDetails(asType, headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION));
EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary);
Set<Entity> finalResult = new HashSet<>();
@ -98,7 +92,8 @@ public class Section {
nerEntities.removeAll(entitiesOfType);
}
@SuppressWarnings("unused")
@WhenCondition
public void combineAiTypes(String startType, String combineTypes, int maxDistanceBetween, String asType, int minPartMatches, boolean allowDuplicateTypes) {
Set<String> combineSet = Set.of(combineTypes.split(","));
@ -160,35 +155,35 @@ public class Section {
}
}
@SuppressWarnings("unused")
@WhenCondition
public boolean fileAttributeByIdEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String id, @Argument(ArgumentType.STRING) String value) {
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> id.equals(attribute.getId()) && value.equals(attribute.getValue()));
}
@SuppressWarnings("unused")
@WhenCondition
public boolean fileAttributeByPlaceholderEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String placeholder, @Argument(ArgumentType.STRING) String value) {
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> placeholder.equals(attribute.getPlaceholder()) && value.equals(attribute.getValue()));
}
@SuppressWarnings("unused")
@WhenCondition
public boolean fileAttributeByLabelEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String label, @Argument(ArgumentType.STRING) String value) {
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> label.equals(attribute.getLabel()) && value.equals(attribute.getValue()));
}
@SuppressWarnings("unused")
@WhenCondition
public boolean fileAttributeByIdEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String id, @Argument(ArgumentType.STRING) String value) {
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> id.equals(attribute.getId()) && value.equalsIgnoreCase(attribute.getValue()));
}
@SuppressWarnings("unused")
@WhenCondition
public boolean fileAttributeByPlaceholderEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String placeholder, @Argument(ArgumentType.STRING) String value) {
@ -196,14 +191,14 @@ public class Section {
.anyMatch(attribute -> placeholder.equals(attribute.getPlaceholder()) && value.equalsIgnoreCase(attribute.getValue()));
}
@SuppressWarnings("unused")
@WhenCondition
public boolean fileAttributeByLabelEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String label, @Argument(ArgumentType.STRING) String value) {
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> label.equals(attribute.getLabel()) && value.equalsIgnoreCase(attribute.getValue()));
}
@SuppressWarnings("unused")
@WhenCondition
public boolean hasTableHeader(@Argument(ArgumentType.STRING) String headerName) {
@ -211,37 +206,37 @@ public class Section {
return tabularData != null && tabularData.containsKey(cleanHeaderName);
}
@SuppressWarnings("unused")
@WhenCondition
public boolean aiMatchesType(@Argument(ArgumentType.TYPE) String type) {
return nerEntities.stream().anyMatch(entity -> !entity.isIgnored() && entity.getType().equals(type));
}
@SuppressWarnings("unused")
@WhenCondition
public boolean matchesType(@Argument(ArgumentType.TYPE) String type) {
return entities.stream().anyMatch(entity -> !entity.isIgnored() && entity.getType().equals(type));
}
@SuppressWarnings("unused")
@WhenCondition
public boolean matchesImageType(@Argument(ArgumentType.TYPE) String type) {
return images.stream().anyMatch(image -> !image.isIgnored() && image.getType().equals(type));
}
@SuppressWarnings("unused")
@WhenCondition
public boolean headlineContainsWord(@Argument(ArgumentType.STRING) String word) {
return StringUtils.containsIgnoreCase(headline, word);
}
@SuppressWarnings("unused")
@WhenCondition
public boolean containsRegEx(@Argument(ArgumentType.STRING) String regEx, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive){
public boolean containsRegEx(@Argument(ArgumentType.STRING) String regEx, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive) {
var compiledPattern = Patterns.getCompiledPattern(regEx, patternCaseInsensitive);
@ -250,7 +245,7 @@ public class Section {
return matcher.find();
}
@SuppressWarnings("unused")
@WhenCondition
public boolean rowEquals(@Argument(ArgumentType.STRING) String headerName, @Argument(ArgumentType.STRING) String value) {
@ -259,18 +254,20 @@ public class Section {
return tabularData != null && tabularData.containsKey(cleanHeaderName) && tabularData.get(cleanHeaderName).toString().equals(value);
}
@SuppressWarnings("unused")
@ThenAction
public void expandByPrefixRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.REGEX) String prefixPattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group) {
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group) {
expandByPrefixRegEx(type, prefixPattern, patternCaseInsensitive, group, null);
}
@ThenAction
@SuppressWarnings("unused")
public void expandByPrefixRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.REGEX) String prefixPattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group,
@Argument(ArgumentType.REGEX) String valuePattern) {
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group,
@Argument(ArgumentType.REGEX) String valuePattern) {
if (StringUtils.isEmpty(prefixPattern)) return;
@ -315,6 +312,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void expandByRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.REGEX) String suffixPattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group) {
@ -323,6 +321,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void expandByRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.REGEX) String suffixPattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group,
@Argument(ArgumentType.REGEX) String valuePattern) {
@ -370,6 +369,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactImage(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
@ -378,6 +378,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactNotImage(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason) {
redactImage(type, ruleNumber, reason, null, false);
@ -385,6 +386,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redact(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
@ -393,6 +395,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactNot(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason) {
redact(type, ruleNumber, reason, null, false);
@ -400,6 +403,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactLineAfter(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.TYPE) String asType, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere, @Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
@ -409,6 +413,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactNotLineAfter(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.TYPE) String asType, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere, @Argument(ArgumentType.STRING) String reason) {
@ -418,6 +423,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactByRegEx(@Argument(ArgumentType.REGEX) String pattern, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group, @Argument(ArgumentType.TYPE) String asType, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
@ -427,6 +433,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactNotByRegEx(@Argument(ArgumentType.REGEX) String pattern, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group, @Argument(ArgumentType.TYPE) String asType, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason) {
@ -436,6 +443,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop, @Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
@ -445,6 +453,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactNotBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop, @Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.STRING) String reason) {
@ -454,6 +463,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactLinesBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop, @Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
@ -463,6 +473,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactNotLinesBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop, @Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.STRING) String reason) {
@ -472,6 +483,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactCell(@Argument(ArgumentType.STRING) String cellHeader, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.TYPE) String type,
@Argument(ArgumentType.BOOLEAN) boolean addAsRecommendations, @Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
@ -481,6 +493,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactNotCell(@Argument(ArgumentType.STRING) String cellHeader, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.TYPE) String type,
@Argument(ArgumentType.BOOLEAN) boolean addAsRecommendations, @Argument(ArgumentType.STRING) String reason) {
@ -489,6 +502,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactAndRecommendByRegEx(@Argument(ArgumentType.REGEX) String pattern, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group, @Argument(ArgumentType.TYPE) String asType, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
@ -498,6 +512,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactNotAndRecommendByRegEx(@Argument(ArgumentType.REGEX) String pattern, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group, @Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason) {
@ -507,6 +522,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void addRecommendationByRegEx(@Argument(ArgumentType.REGEX) String pattern, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group, @Argument(ArgumentType.TYPE) String asType) {
@ -524,6 +540,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactNotAndReference(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.REFERENCE_TYPE) String referenceType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason) {
@ -542,6 +559,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactIfPrecededBy(@Argument(ArgumentType.STRING) String prefix, @Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
@ -557,6 +575,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void addRedaction(@Argument(ArgumentType.STRING) String value, @Argument(ArgumentType.TYPE) String asType, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
@ -564,13 +583,15 @@ public class Section {
EntitySearchUtils.addEntitiesIgnoreRank(entities, found);
}
@ThenAction
@SuppressWarnings("unused")
public void ignore(String type) {
entities.removeIf(entity -> entity.getType().equals(type) && entity.getEntityType().equals(EntityType.ENTITY));
}
@ThenAction
@SuppressWarnings("unused")
public void ignoreRecommendations(String type) {
entities.removeIf(entity -> entity.getType().equals(type) && entity.getEntityType().equals(EntityType.RECOMMENDATION));
@ -578,8 +599,9 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void expandToFalsePositiveByRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.STRING) String pattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group) {
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group) {
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
@ -607,6 +629,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void addHintAnnotationByRegEx(@Argument(ArgumentType.REGEX) String pattern, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group, @Argument(ArgumentType.TYPE) String asType) {
@ -625,6 +648,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void addHintAnnotation(@Argument(ArgumentType.STRING) String value, @Argument(ArgumentType.TYPE) String asType) {
Set<Entity> found = findEntities(value.trim(), asType, true, false, 0, null, null, Engine.RULE, false);
@ -633,6 +657,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void recommendLineAfter(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.TYPE) String asType) {
String[] values = StringUtils.substringsBetween(text, start, "\n");
@ -657,6 +682,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void highlightCell(@Argument(ArgumentType.STRING) String cellHeader, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.TYPE) String type) {
annotateCell(cellHeader, ruleNumber, type, false, false, null, null);
@ -684,8 +710,8 @@ public class Section {
String text = caseInsensitive ? searchText.toLowerCase() : searchText;
String searchValue = caseInsensitive ? value.toLowerCase() : value;
Set<Entity> found = EntitySearchUtils.findEntities(text, Set.of(searchValue), dictionary.getType(asType), headline, sectionNumber, false, false, engine, false, asRecommendation);
Set<Entity> found = EntitySearchUtils.findEntities(text, List.of(searchValue), dictionary.getType(asType),
new FindEntityDetails(asType, headline, sectionNumber, false, false, engine, asRecommendation ? EntityType.RECOMMENDATION : EntityType.ENTITY));
found.forEach(entity -> {
if (redacted) {
entity.setRedaction(true);

View File

@ -129,6 +129,10 @@ public class DictionaryService {
var newFalsePositivesValues = newEntries.getFalsePositives().stream().map(DictionaryEntry::getValue).collect(Collectors.toSet());
var newFalseRecommendationsValues = newEntries.getFalseRecommendations().stream().map(DictionaryEntry::getValue).collect(Collectors.toSet());
oldModel.ifPresent(oldDictionaryModel -> {
});
// add old entries from existing DictionaryModel
oldModel.ifPresent(dictionaryModel -> entries.addAll(dictionaryModel.getEntries().stream().filter(
f -> !newValues.contains(f.getValue())).collect(Collectors.toList())
@ -146,7 +150,7 @@ public class DictionaryService {
falseRecommendations.addAll(newEntries.getFalseRecommendations());
return new DictionaryModel(t.getType(), t.getRank(), convertColor(t.getHexColor()), t.isCaseInsensitive(), t
.isHint(), entries, falsePositives, falseRecommendations, new HashSet<>(), dossierId != null);
.isHint(), entries, falsePositives, falseRecommendations, dossierId != null);
})
.sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed())
.collect(Collectors.toList());
@ -193,6 +197,7 @@ public class DictionaryService {
}
private float[] convertColor(String hex) {
Color color = Color.decode(hex);

View File

@ -1,18 +1,5 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
@ -21,22 +8,20 @@ import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entities;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.FindEntityDetails;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@Slf4j
@Service
@ -86,6 +71,7 @@ public class EntityRedactionService {
surroundingWordsService.addSurroundingText(entities.getEntities(), reanalysisSection.getSearchableText(), dictionary);
}
if (!local && analyzeRequest.getManualRedactions() != null) {
var approvedForceRedactions = analyzeRequest.getManualRedactions().getForceRedactions().stream()
@ -124,6 +110,7 @@ public class EntityRedactionService {
}));
}
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
.isLocal(false)
.dictionaryTypes(dictionary.getTypes())
@ -142,6 +129,7 @@ public class EntityRedactionService {
.images(reanalysisSection.getImages())
.fileAttributes(analyzeRequest.getFileAttributes())
.build(), reanalysisSection.getSearchableText()));
}
Set<Entity> entities = new HashSet<>();
@ -200,18 +188,18 @@ public class EntityRedactionService {
private void addLocalValuesToDictionary(Section analysedSection, Dictionary dictionary) {
analysedSection.getLocalDictionaryAdds().keySet().forEach(key -> {
analysedSection.getLocalDictionaryAdds().get(key).forEach(value -> {
analysedSection.getLocalDictionaryAdds().get(key).forEach(value -> {
if (dictionary.getLocalAccessMap().get(key) == null) {
log.warn("Dictionary {} is null", key);
}
if (dictionary.getLocalAccessMap().get(key) == null) {
log.warn("Dictionary {} is null", key);
}
if (dictionary.getLocalAccessMap().get(key).getLocalEntries() == null) {
log.warn("Dictionary {} localEntries is null", key);
}
if (dictionary.getLocalAccessMap().get(key).getLocalEntries() == null) {
log.warn("Dictionary {} localEntries is null", key);
}
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
});
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
});
});
}
@ -221,18 +209,20 @@ public class EntityRedactionService {
List<Integer> cellStarts) {
Set<Entity> found = new HashSet<>();
String searchableString = searchableText.toString();
String searchableString = searchableText.asString();
if (StringUtils.isEmpty(searchableString)) {
return new Entities(new HashSet<>(), new HashSet<>());
}
String lowercaseInputString = searchableString.toLowerCase();
for (DictionaryModel model : dictionary.getDictionaryModels()) {
if (model.isCaseInsensitive()) {
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.findEntities(lowercaseInputString, model.getValues(local), model, headline, sectionNumber, !local, model.isDossierDictionary(), local ? Engine.RULE : Engine.DICTIONARY, false, local ? true : false));
} else {
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.findEntities(searchableString, model.getValues(local), model, headline, sectionNumber, !local, model.isDossierDictionary(), local ? Engine.RULE : Engine.DICTIONARY, false, local ? true : false));
}
var trie = local ? model.getLocalEntriesTrie() : model.getEntriesTrie();
var entities = EntitySearchUtils.findEntities(model.isCaseInsensitive() ? lowercaseInputString : searchableString,
trie, model, new FindEntityDetails(model.getType(),headline, sectionNumber, !local, model.isDossierDictionary(), local ? Engine.RULE : Engine.DICTIONARY, local? EntityType.RECOMMENDATION: EntityType.ENTITY));
EntitySearchUtils.addOrAddEngine(found, entities);
}
Set<Entity> nerFound = new HashSet<>();

View File

@ -3,7 +3,9 @@ package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import com.iqser.red.service.redaction.v1.server.redaction.utils.FindEntityDetails;
import org.apache.commons.lang3.tuple.Pair;
import org.springframework.stereotype.Service;
@ -91,7 +93,7 @@ public class ManualRedactionSurroundingTextService {
private Pair<String, String> findSurroundingText(SectionText sectionText, String value,
List<Rectangle> toFindPositions) {
Set<Entity> entities = EntitySearchUtils.find(sectionText.getText(), Set.of(value), "dummy", sectionText.getHeadline(), sectionText.getSectionNumber(), false, false, Engine.DICTIONARY, false, EntityType.ENTITY);
Set<Entity> entities = EntitySearchUtils.find(sectionText.getText(), value,new FindEntityDetails( "dummy", sectionText.getHeadline(), sectionText.getSectionNumber(), false, false, Engine.DICTIONARY, EntityType.ENTITY));
Set<Entity> entitiesWithPositions = EntitySearchUtils.clearAndFindPositions(entities, sectionText.getSearchableText(), null);
Entity correctEntity = getEntityOnCorrectPosition(entitiesWithPositions, toFindPositions);

View File

@ -175,7 +175,7 @@ public class SectionTextBuilderService {
sectionText.getSectionAreas().add(sectionArea);
}
sectionText.setText(searchableText.toString());
sectionText.setText(searchableText.asString());
sectionText.setHeadline(headline);
sectionText.setSectionNumber(sectionNumber.intValue());
sectionText.setTable(false);

View File

@ -31,7 +31,7 @@ public class SurroundingWordsService {
if (dictionary != null && dictionary.isHint(entity.getType())) {
continue;
}
findSurroundingWords(entity, searchableText.toString(), entity.getStart(), entity.getEnd());
findSurroundingWords(entity, searchableText.asString(), entity.getStart(), entity.getEnd());
}
} catch (Exception e) {
log.warn("Could not get surrounding text!");
@ -47,7 +47,7 @@ public class SurroundingWordsService {
}
try {
String searchableString = searchableText.toString();
String searchableString = searchableText.asString();
if (cellstarts != null) {
for (int i = 0; i < cellstarts.size(); i++) {

View File

@ -1,108 +1,95 @@
package com.iqser.red.service.redaction.v1.server.redaction.utils;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;
import org.ahocorasick.trie.Trie;
import java.util.*;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
@Slf4j
@UtilityClass
@SuppressWarnings("PMD")
public class EntitySearchUtils {
public boolean sectionContainsAny(String sectionText, Set<DictionaryIncrementValue> values) {
var trie = Trie.builder().ignoreCase().addKeywords(values.stream().map(DictionaryIncrementValue::getValue).collect(Collectors.toList())).build();
return trie.containsMatch(sectionText.toLowerCase(Locale.ROOT));
}
String inputString = sectionText.toLowerCase(Locale.ROOT);
public Set<Entity> findEntities(String inputString, List<String> values, DictionaryModel type, FindEntityDetails details) {
for (DictionaryIncrementValue value : values) {
var builder = Trie.builder()
.addKeywords(values);
String cleanValue = value.getValue().toLowerCase(Locale.ROOT).trim();
if (cleanValue.length() <= 2) {
continue;
}
int startIndex;
int stopIndex = 0;
do {
startIndex = inputString.indexOf(cleanValue, stopIndex);
stopIndex = startIndex + cleanValue.length();
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
return true;
}
} while (startIndex > -1);
if (type.isCaseInsensitive()) {
builder.ignoreCase();
}
return false;
return findEntities(inputString, builder.build(), type, details);
}
public Set<Entity> findEntities(String inputString, Set<String> values, DictionaryModel type, String headline, int sectionNumber, boolean isDictionaryEntry, boolean isDossierDictionary,
Engine engine, boolean ignoreMinLength, boolean asRecommendation) {
public Set<Entity> findEntities(String inputString, Trie trie, DictionaryModel type, FindEntityDetails details) {
Set<Entity> found = find(inputString, values, type.getType(), headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine, ignoreMinLength, asRecommendation ? EntityType.RECOMMENDATION : EntityType.ENTITY);
Set<Entity> found = find(inputString, trie, details);
if(asRecommendation){
Set<Entity> falseRecommendations = find(inputString, type.getFalseRecommendationValues(), type.getType(), headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine, ignoreMinLength, EntityType.FALSE_RECOMMENDATION);
removeFalsePositives(found, falseRecommendations);
found.addAll(falseRecommendations);
} else {
Set<Entity> falsePositives = find(inputString, type.getFalsePositiveValues(), type.getType(), headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine, ignoreMinLength, EntityType.FALSE_POSITIVE);
removeFalsePositives(found, falsePositives);
found.addAll(falsePositives);
}
return found;
}
public Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber, boolean isDictionaryEntry, boolean isDossierDictionary,
Engine engine, boolean ignoreMinLength, EntityType entityType) {
Set<Entity> found = new HashSet<>();
for (String value : values) {
String cleanValue = value.trim();
if (!ignoreMinLength && cleanValue.length() <= 2) {
continue;
}
int startIndex;
int stopIndex = 0;
do {
startIndex = inputString.indexOf(cleanValue, stopIndex);
stopIndex = startIndex + cleanValue.length();
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine, entityType));
}
} while (startIndex > -1);
if (details.getEntityType() == EntityType.RECOMMENDATION) {
Set<Entity> falseRecommendations = find(inputString, type.getFalseRecommendationsTrie(), details.withEntityType(EntityType.FALSE_RECOMMENDATION));
removeFalsePositives(found, falseRecommendations);
found.addAll(falseRecommendations);
} else {
Set<Entity> falsePositives = find(inputString, type.getFalsePositivesTrie(), details.withEntityType(EntityType.FALSE_POSITIVE));
removeFalsePositives(found, falsePositives);
found.addAll(falsePositives);
}
return found;
}
public Set<Entity> find(String inputString, String value, FindEntityDetails findEntityDetails) {
var trie = Trie.builder()
.addKeywords(value).build();
Set<Entity> entities = new HashSet<>();
trie.parseText(inputString).forEach(found -> {
var startIndex = found.getStart();
var stopIndex = found.getEnd() + 1;
validateAndAddEntity(entities, findEntityDetails, inputString, startIndex, stopIndex);
});
return entities;
}
public Set<Entity> find(String inputString, Trie trie, FindEntityDetails findEntityDetails) {
Set<Entity> entities = new HashSet<>();
var matches = trie.parseText(inputString);
matches.forEach(match -> {
var startIndex = match.getStart();
var stopIndex = match.getEnd() + 1;
validateAndAddEntity(entities, findEntityDetails, inputString, startIndex, stopIndex);
});
return entities;
}
private void validateAndAddEntity(Set<Entity> entities, FindEntityDetails findEntityDetails, String inputString, int startIndex, int stopIndex) {
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
entities.add(new Entity(inputString.substring(startIndex, stopIndex), findEntityDetails.getType(), startIndex, stopIndex,
findEntityDetails.getHeadline(), findEntityDetails.getSectionNumber(), findEntityDetails.isDictionaryEntry(),
findEntityDetails.isDossierDictionary(), findEntityDetails.getEngine(), findEntityDetails.getEntityType()));
}
}
private boolean isSeparator(char c) {
@ -140,7 +127,6 @@ public class EntitySearchUtils {
}
public void removeFalsePositives(Set<Entity> entities, Set<Entity> falsePositives) {
List<Entity> wordsToRemove = new ArrayList<>();
@ -148,17 +134,15 @@ public class EntitySearchUtils {
for (Entity inner : entities) {
if (inner.getWord().length() < word.getWord()
.length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word.getSectionNumber() == inner.getSectionNumber()) {
wordsToRemove.add(inner);
wordsToRemove.add(inner);
}
}
}
entities.removeAll(wordsToRemove);
wordsToRemove.forEach(entities::remove);
entities.removeAll(falsePositives);
}
public void removeEntitiesContainedInLarger(Set<Entity> entities) {
List<Entity> wordsToRemove = new ArrayList<>();
@ -216,17 +200,17 @@ public class EntitySearchUtils {
if (entities.contains(found)) {
Optional<Entity> existingOptional = entities.stream().filter(entity -> entity.equals(found)).findFirst();
if (!existingOptional.isPresent()) {
if (existingOptional.isEmpty()) {
return;
}
var existing = existingOptional.get();
if (existing.getType().equals(found.getType())) {
existing.getEngines().addAll(found.getEngines());
if(existing.getEntityType().equals(EntityType.RECOMMENDATION) && found.getEntityType().equals(EntityType.ENTITY)
|| existing.getEntityType().equals(EntityType.ENTITY) && found.getEntityType().equals(EntityType.RECOMMENDATION)){
if (existing.getEntityType().equals(EntityType.RECOMMENDATION) && found.getEntityType().equals(EntityType.ENTITY)
|| existing.getEntityType().equals(EntityType.ENTITY) && found.getEntityType().equals(EntityType.RECOMMENDATION)) {
existing.setEntityType(EntityType.ENTITY);
if(found.isRedaction()){
if (found.isRedaction()) {
existing.setRedaction(true);
}
}
@ -282,7 +266,7 @@ public class EntitySearchUtils {
for (Entity toAdd : toBeAdded) {
if (existing.contains(toAdd)) {
Optional<Entity> existingOptional = existing.stream().filter(entity -> entity.equals(toAdd)).findFirst();
if (!existingOptional.isPresent()) {
if (existingOptional.isEmpty()) {
return;
}
var existingEntity = existingOptional.get();

View File

@ -0,0 +1,24 @@
package com.iqser.red.service.redaction.v1.server.redaction.utils;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType;
import lombok.AllArgsConstructor;
import lombok.Data;
@Data
@AllArgsConstructor
public class FindEntityDetails {
private String type;
private String headline;
private int sectionNumber;
private boolean isDictionaryEntry;
private boolean isDossierDictionary;
private Engine engine;
private EntityType entityType;
public FindEntityDetails withEntityType(EntityType entityType) {
this.entityType = entityType;
return this;
}
}

View File

@ -44,6 +44,12 @@ public class RedactionStorageService {
storageService.storeObject(StorageIdUtils.getStorageId(dossierId, fileId, fileType), objectMapper.writeValueAsBytes(any));
}
@SneakyThrows
public void storeObject(String dossierId, String fileId, FileType fileType, InputStream inputStream){
storageService.storeObject(StorageIdUtils.getStorageId(dossierId, fileId, fileType), inputStream);
}
public ImportedRedactions getImportedRedactions(String dossierId, String fileId) {

View File

@ -10,15 +10,14 @@ import org.springframework.core.io.InputStreamResource;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.HashMap;
import java.util.Map;
import java.io.InputStream;
import java.util.*;
public class FileSystemBackedStorageService extends S3StorageService {
public class FileSystemBackedStorageService implements StorageService{
private final Map<String, File> dataMap = new HashMap<>();
public FileSystemBackedStorageService() {
super(null, null);
}
@SneakyThrows
@ -33,16 +32,45 @@ public class FileSystemBackedStorageService extends S3StorageService {
}
@Override
public void deleteObject(String objectId) {
dataMap.remove(objectId);
}
@Override
public boolean objectExists(String objectId) {
return dataMap.containsKey(objectId);
}
@Override
public void init() {
}
public List<String> listPaths(){
return new ArrayList<>(dataMap.keySet());
}
@SneakyThrows
@Override
public void storeObject(String objectId, byte[] data) {
File tempFile = File.createTempFile("test", ".tmp");
File tempFile = File.createTempFile("storage", objectId.replace("/","-"));
IOUtils.write(data, new FileOutputStream(tempFile));
dataMap.put(objectId, tempFile);
}
@Override
@SneakyThrows
public void storeObject(String objectId, InputStream stream) {
File tempFile = File.createTempFile("test", ".tmp");
IOUtils.copy(stream, new FileOutputStream(tempFile));
dataMap.put(objectId, tempFile);
}
public void clearStorage() {
this.dataMap.forEach((k, v) -> {
v.delete();

View File

@ -0,0 +1,95 @@
package com.iqser.red.service.redaction.v1.server.realdata;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.MessageType;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.junit.Test;
import org.springframework.core.io.ClassPathResource;
import java.io.FileOutputStream;
import java.time.OffsetDateTime;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
@Slf4j
public class AnalyseFileRealDataIntegrationTest extends LiveDataIntegrationTest {
@Test
@SneakyThrows
public void testFile() {
ObjectMapper om = new ObjectMapper();
om.registerModule(new JavaTimeModule());
var file = new ClassPathResource(BASE_DIR + "data/test-file.pdf").getInputStream();
var nerData = new ClassPathResource(BASE_DIR + "data/test-file.ner.json").getInputStream();
var text = new ClassPathResource(BASE_DIR + "data/test-file.text.json").getInputStream();
var sectionText = new ClassPathResource(BASE_DIR + "data/test-file.section-grid.json").getInputStream();
var redactionLog = new ClassPathResource(BASE_DIR + "data/test-file.redaction-log.json").getInputStream();
redactionStorageService.storeObject("dossierId", "fileId", FileType.ORIGIN, file);
redactionStorageService.storeObject("dossierId", "fileId", FileType.NER_ENTITIES, nerData);
redactionStorageService.storeObject("dossierId", "fileId", FileType.TEXT, text);
redactionStorageService.storeObject("dossierId", "fileId", FileType.SECTION_GRID, sectionText);
redactionStorageService.storeObject("dossierId", "fileId", FileType.REDACTION_LOG, redactionLog);
AnalyzeRequest ar = AnalyzeRequest.builder()
.fileId("fileId")
.dossierId("dossierId")
.analysisNumber(1)
.dossierTemplateId("dossierTemplateId")
.lastProcessed(OffsetDateTime.now())
.excludedPages(Set.of())
.fileAttributes(List.of())
.messageType(MessageType.STRUCTURE_ANALYSE)
.build();
String in = om.writeValueAsString(ar);
// redactionMessageReceiver.receiveAnalyzeRequest(in, false);
// log.warn("done structure");
var txt = redactionStorageService.getText("dossierId", "fileId");
var totalText = txt.getSectionTexts().stream().map(SectionText::getText).collect(Collectors.joining("\n"));
System.out.println(totalText.length());
// ar.setMessageType(MessageType.ANALYSE);
// in = om.writeValueAsString(ar);
// redactionMessageReceiver.receiveAnalyzeRequest(in, false);
// log.warn("done analyze");
simulateIncrement(List.of("study"),"PII",3L);
ar.setMessageType(MessageType.REANALYSE);
in = om.writeValueAsString(ar);
redactionMessageReceiver.receiveAnalyzeRequest(in, false);
log.warn("done analyze");
var log = redactionStorageService.getRedactionLog("dossierId", "fileId");
om.writeValue(new FileOutputStream("/tmp/test-file.redaction-log.json"), log);
System.out.println(log.getRedactionLogEntry().size());
}
// public static long hash(char[]){
// return ((value % prime) + prime) % prime;
// }
// public static long getBiggerPrime() {
// BigInteger prime = BigInteger.probablePrime(getNumberOfBits(10) + 1, new Random());
// return prime.longValue();
// }
// private static int getNumberOfBits(int number) {
// return Integer.SIZE - Integer.numberOfLeadingZeros(number);
// }
}

View File

@ -0,0 +1,192 @@
package com.iqser.red.service.redaction.v1.server.realdata;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.server.Application;
import com.iqser.red.service.redaction.v1.server.FileSystemBackedStorageService;
import com.iqser.red.service.redaction.v1.server.client.*;
import com.iqser.red.service.redaction.v1.server.queue.RedactionMessageReceiver;
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import lombok.SneakyThrows;
import org.apache.commons.io.IOUtils;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;
import org.springframework.core.io.support.ResourcePatternResolver;
import org.springframework.test.context.junit4.SpringRunner;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
import static org.mockito.ArgumentMatchers.*;
import static org.mockito.Mockito.when;
@RunWith(SpringRunner.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(LiveDataIntegrationTest.RedactionIntegrationTestConfiguration.class)
public class LiveDataIntegrationTest {
protected static String BASE_DIR = "performance/";
protected static String EFSA_SANITISATION_GFL_V1 = "dictionaries/EFSA_sanitisation_GFL_v1/";
@MockBean
protected DictionaryClient dictionaryClient;
@MockBean
protected EntityRecognitionClient entityRecognitionClient;
@MockBean
private FileStatusProcessingUpdateClient fileStatusProcessingUpdateClient;
@MockBean
private LegalBasisClient legalBasisClient;
@Autowired
private ResourcePatternResolver resourcePatternResolver;
@MockBean
private RulesClient rulesClient;
@Autowired
protected DictionaryService dictionaryService;
@Autowired
protected RedactionStorageService redactionStorageService;
@Autowired
protected RedactionMessageReceiver redactionMessageReceiver;
@Autowired
protected FileSystemBackedStorageService fileSystemBackedStorageService;
private List<Type> types;
@Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class, StorageAutoConfiguration.class})
public static class RedactionIntegrationTestConfiguration {
@Bean
@Primary
public StorageService inmemoryStorage() {
return new FileSystemBackedStorageService();
}
}
@SneakyThrows
@Before
public void prepareTest() {
when(dictionaryClient.getVersion(anyString())).thenReturn(1L);
when(dictionaryClient.getVersionForDossier(anyString())).thenReturn(1L);
var rules = IOUtils.toString(new ClassPathResource(BASE_DIR + EFSA_SANITISATION_GFL_V1 + "rules.drl").getInputStream());
when(rulesClient.getRules(any())).thenReturn(JSONPrimitive.of(rules));
ObjectMapper objectMapper = new ObjectMapper();
var jsonNode = objectMapper.readTree(new ClassPathResource(BASE_DIR + EFSA_SANITISATION_GFL_V1 + "types.json").getInputStream());
types = objectMapper.treeToValue(jsonNode.get("types"), objectMapper.getTypeFactory().constructType(
new TypeReference<List<Type>>() {
}));
types.forEach(t -> {
t.setId(t.getType());
t.setVersion(0L);
});
when(dictionaryClient.getAllTypesForDossierTemplate(anyString(), anyBoolean())).thenReturn(types);
when(dictionaryClient.getAllTypesForDossier(anyString(), anyBoolean())).thenReturn(new ArrayList<>());
when(dictionaryClient.getColors(anyString())).thenReturn(objectMapper.readValue(new ClassPathResource(BASE_DIR + EFSA_SANITISATION_GFL_V1 + "colors.json").getInputStream(), Colors.class));
when(dictionaryClient.getDictionaryForType(anyString(), nullable(Long.class))).then(answer -> {
String typeName = answer.getArgument(0);
var found = types.stream().filter(t -> t.getType().equalsIgnoreCase(typeName)).findFirst();
if(found.isPresent()) {
var type = types.stream().filter(t -> t.getType().equalsIgnoreCase(typeName)).findFirst().get();
type.setEntries(getEntries(typeName, type.getTypeId()));
return type;
}else{
return null;
}
});
dictionaryService.updateDictionary("dossierTemplateId","dossierId");
}
public void simulateIncrement(List<String> values, String deltaTypeName, long version) {
when(dictionaryClient.getVersion(anyString())).thenReturn(version);
when(dictionaryClient.getVersionForDossier(anyString())).thenReturn(version);
when(dictionaryClient.getDictionaryForType(anyString(), nullable(Long.class))).then(answer -> {
String typeName = answer.getArgument(0);
var found = types.stream().filter(t -> t.getType().equalsIgnoreCase(typeName)).findFirst();
if(found.isPresent()) {
var type = types.stream().filter(t -> t.getType().equalsIgnoreCase(typeName)).findFirst().get();
if (deltaTypeName.equalsIgnoreCase(typeName)) {
type.setEntries(values.stream().map(l -> new DictionaryEntry(0, l, version, false, type.getTypeId())).collect(Collectors.toList()));
} else {
type.setEntries(new ArrayList<>());
}
return type;
}else{
return null;
}
});
}
@Test
public void testUpdateDictionary() {
dictionaryService.updateDictionary("dossierTemplateId", "dossierId");
var dict = dictionaryService.getDeepCopyDictionary("dossierTemplateId", "dossierId");
assertThat(dict.getLocalAccessMap().size()).isEqualTo(12);
}
@SneakyThrows
private List<DictionaryEntry> getEntries(String typeName, String typeId) {
Resource[] dictionaryResources = resourcePatternResolver.getResources("classpath:" + BASE_DIR + EFSA_SANITISATION_GFL_V1 + "dictionaries/**");
for (var resource : dictionaryResources) {
if (resource.getFilename().contains(typeName)) {
List<String> lines = IOUtils.readLines(resource.getInputStream());
return lines.stream().map(l -> new DictionaryEntry(0, l, 0L, false, typeId)).collect(Collectors.toList());
}
}
return new ArrayList<>();
}
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,12 @@
{
"defaultColor": "#9398a0",
"requestAdd": "#04b093",
"requestRemove": "#04b093",
"notRedacted": "#c498fa",
"analysisColor": "#dd4d50",
"updatedColor": "#fdbd00",
"dictionaryRequestColor": "#5b97db",
"manualRedactionColor": "#9398a0",
"previewColor": "#9398a0",
"ignoredHintColor": "#e7d4ff"
}

View File

@ -0,0 +1,21 @@
5 Batch
Author
Batch Analysis
Certificate of Analysis
Chemical Investigator
Contributors
Five Batch
GLP Certificate
Pathologist
Performing Laboratory
Principle Investigator
Quality Assurance
References
Report of Peer Review
Study Director
Study Monitor
Test Facility
Test Site
Trial Site
altitude
latitude

View File

@ -0,0 +1,234 @@
. Comparative Biochemistry and Physiology Part C: Toxicology & Pharmacology C Toxicology & Pharmacology
. J. Exp. Zool
. Lewis Publ., Boca Raton,
1944; 82:377-390
7th ed.; The Iowa State University Press: Ames, IA
7th ed.; The Joint Nature ConservationIowa State University Press: Ames, IAReproduction, Fertility and Development 2
ATLA
Academic Press
Advances in
Ageing Dev.
Agromedicine
Am J Epidemiol
Am J Ind Med
Am J Med Genet A
Am J Respir Crit Care Med
American Society for Testing and Materials, Philadelphia, Pennsylvania.
Anal Chem.
Anal, Chem.,
Animal Reproduction Science
Annals of
Annals of Maths Stats
Apidologie
Aq. Toxicol.
Aquatic Ecology
Aquatic Toxicology
Aquatic toxicology
Arch Environ Contam Toxicol
Arch Toxicol
Arch Toxicol.
Archives of Environmental Contamination and Toxicology
Archives of Toxicology
Australasian Journal of Ecotoxicology
BioFactors
Biomedical chromatography
Biometrics
Biometrika
Birth Defects Res. B. Dev. Reprod. Toxicol
Birth Defects Res. B. Dev. Reprod. Toxicol.
Br Med J
British Health and Safety Executive, Chemicals Regulation Directorate Pesticides
Brown Company Publishers, Dubuque, Iowa
Bull Environ Contam Toxicol.
Bull. Environ. Contam. Toxicol
Bulletin
Bulletin of Environmental Contamination and Toxicology
CHEMOSPHERE
Cahiers Agricultures
Cancer Letters
Cancer Res
Carcinogenesis
Chem. Res. Toxicol.
Chem. Res. in Toxicol.
Chemosphere
Chinese Journal of Chromatography
Clin Anat.
Comparative Biochemistry and Physiology Part C: Toxicology & Pharmacology C Toxicology & Pharmacology
Crit Rev Toxicol
Critical Reviews in Toxicology
Curr. Med. Chem.
Current approaches in the statistical analysis of ecotoxicity data: guidance to application
DEFRA
Deutsche Lebensmittel-Rundschau
Deutsche Lebensmittel-Rundschau.
Dongbei Nongye Daxue Xuebao
Dual-Gold
EFSA Journal
Ecological Research
Ecotoxicol. Environ. Safety
Ecotoxicology and Environmental Safety
Endocrinology
Env. Mol. Mutagen
Environ Health Perspect
Environ Health Perspect.
Environ Health Perspect. 1
Environ Mutat
Environ Sci Pollut Res
Environ Sci Technol
Environ Toxicol.
Environ and Molecular Mutagenesis
Environ. Health Perspec.
Environ. Health Perspect.
Environ. Sci. Technol
Environ. Sci. Technol.
Environment International
Environmental & Molecular Mutagenesis
Environmental Health
Environmental Health Perspectives
Environmental Monitoring and Assessment
Environmental Pollution
Environmental Protection Agency Federal Register
Environmental Science & Technology
Environmental Science and Technology
Environmental Science and Technology.
Environmental Toxicology and Chemistry
Environmental monitoring and assessment
Environmental toxicology and chemistry
Essays in Honor of Harold Hotelling
Eur Respir
Experimental Cel] Research
FUNDAMENTAL AND APPLIED TOXICOLOGY
Fischer, Jena
Fish Sci
Food Cosmet. Toxicol.
Franckh, Stuttgar
Franckh, Stuttgart
Free Rad. Res
Fundamentals of Applied Toxicology
Fundamentals of Applied Toxicology1988
Gen. Comp. Endocrinol
Gen. Comp.Endocrinol
Gen.Com. Endocrinol.
General and Comparative Endocrinology
Greenhouse Pest Management
Gustav Fischer Verlag, Stuttgart
Handbook of water purity and quality.
Health and Safety Executive, Chemi-cals Regulation Directorate Pesticides
Herpetologica
High-Throughput Screening Methods in Toxicity Testing
High-Throughput Screening Methods in Toxicity Testing. Hoboken, NJ: John Wiley & Sons
ISBN
ISSN
ISSN/ISBN
Ibis
Int Arch Occup Environ Health
Int J Cancer
Irish Journal of Agricultural and Food Research
Ist Annual Symposium on Aquatic Toxicology ASTM,
J Econom
J Endocrinol
J LARGILLIERE
J Radiat Res
J Steroid Biochem Mol Biol
J. Invest. Derm
Joint Nature Conservation Committee
Journal Pharmacology Experimental Therapeutics
Journal of
Journal of Animal Ecology
Journal of Applied Ecology
Journal of Applied Entomology
Journal of Economic Entomology
Journal of Environmental Science and Health
Journal of Experimental Biology and Ecology
Journal of Hazardous Materials
Journal of Microbiology
Journal of Toxicology and
Journal of agricultural and food chemistry
Journal of the American College of Toxicology
Journal of the Royal Statistical Society,
Leaflet
Limnologica
Marine Enironmental Research
Marine Environmental Research
Marine Pollution Bulletin
Mass Spectrometry
Medical Journal
Middle Atlantic Reproduction and Teratology Association
Mol Mutagen
Mol. Cell. Endocrinol.
Mutagenesis
Mutat Res
Mutation Research
National Academy of Science,
Nature
Neurotoxicity research
Nonparametric Statistics for the Behavioral Sciences
OECD Guideline for the testing of chemicals
OECD Guidelines for Testing of Chemicals
OECD Guidelines for testing of chemicals.
OECD Principles of Good Laboratory Practice and Compliance Monitoring
OECD Series on Testing Assessment
Occup Environ Med
Occupational and Environmental Medicine
Office of Prevention, Pesticides and Toxic Substances
Official Journal of the European Communities
Oxford University Press
PLoS ONE
Pediat. Res
Pesticide Biochemistry and Physiology
Physiol Chem
Planta daninha
Press
Principles and Procedures of Statistics
Principles and Procedures of Statistics, A Biometrical Approach
Proc Natl Acad Sci USA
Psychopharmacologia
Publication
Publications Office of the European Union, Luxembourg
Published: Y
Publisher Gustav Fischer, Stuttgart
Raven press,
Reg. Tox. Pharmacol
Regul. Toxicol. Pharmacol
Regulatory Toxicology and Pharmacology
Reproductive BioMedicine Online
Reproductive Toxicology
Schweizerbartsche Verlagsbuchhandlung
Science of the Total Environment
Scientific Reports
Stain Technol
Superorder Monogonnata
Swiss Ordinance relating to Good Laboratory Practice
Teratology
The American Statistician
The Chemical Society,
The Inland Waters Volume
The Royal Society for the Protection of Birds
Tidepool Scientific Software, Mckinleyville, CA, USA
Toxicol Chem
Toxicol Pathol.
Toxicol Sci
Toxicol Sci.
Toxicol Sci. 1
Toxicol in Vitro
Toxicol. Sci,
Toxicol. Sci.
Toxicol. in Vitro
Toxicologic Pathology
Toxicological Sciences
Toxicological and Environmental Chemistry
Toxicology Letters
Toxicology and Applied Pharmacology
Toxicology and Industrial Health
Toxicology in Vitro
Trends in Neurosciences
Umweltbundesamt Development & Research Project
WATER CHLORINATION:CHEM, ENVIRON. IMPACT HEALTH EFF., PROC. CONF
Water Res.
Workshop
World Journal of Pharmaceutical Research
XIII Symposium Pesticide Chemistry
edition
http://www.iobc-wprs.org
ournal of the Royal Statistical Society
paper was presented

View File

@ -0,0 +1,22 @@
{
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
"name": "EFSA sanitisation GFL v1",
"description": "Redact all authors, laboratory addresses and other geolocation.",
"dateAdded": "2021-01-21T14:25:35Z",
"dateModified": "2021-07-19T08:09:27Z",
"createdBy": "e3aed6ea-a9e5-4f3f-bde7-6f0fe0c4362c",
"modifiedBy": "e3aed6ea-a9e5-4f3f-bde7-6f0fe0c4362c",
"validFrom": null,
"validTo": null,
"downloadFileTypes": [
"PREVIEW",
"REDACTED",
"DELTA_PREVIEW"
],
"reportTypes": [
"EXCEL_MULTI_FILE",
"WORD_SINGLE_FILE_APPENDIX_A1_TEMPLATE",
"WORD_SINGLE_FILE_APPENDIX_A2_TEMPLATE"
],
"dossierTemplateStatus": "ACTIVE"
}

View File

@ -0,0 +1,47 @@
[
{
"name": "1.1 personal data (incl. geolocation); Article 39(e)(3)",
"description": "(Regulations (EU) 2016/679 and (EU) 2018/1725 shall apply to the processing of personal data carried out pursuant to this Regulation. Any personal data made public pursuant to Article 38 of this Regulation and this Article shall only be used to ensure the transparency of the risk assessment under this Regulation and shall not be further processed in a manner that is incompatible with these purposes, in accordance with point (b) of Article 5(1) of Regulation (EU) 2016/679 and point (b) of Article 4(1) of Regulation (EU) 2018/1725, as the case may be)",
"reason": "Article 39(e)(3) of Regulation (EC) No 178/2002"
},
{
"name": "1.2 vertebrate study related personal data (incl. geolocation); Article 39(e)(2)",
"description": "personal data (names and addresses) of individuals involved in testing on vertebrate studies or in obtaining toxicological information",
"reason": "Article 39(e)(2) of Regulation (EC) No 178/2002"
},
{
"name": "2. manufacturing or production process",
"description": "the manufacturing or production process, including the method and innovative aspects thereof, as well as other technical and industrial specifications inherent to that process or method, except for information which is relevant to the assessment of safety",
"reason": "Article 63(2)(a) of Regulation (EC) No 1107/2009 (making reference to Article 39 of Regulation EC No 178/2002)"
},
{
"name": "3. links between a producer and applicant",
"description": "commercial links between a producer or importer and the applicant or the authorisation holder, where applicable",
"reason": "Article 63(2)(a) of Regulation (EC) No 1107/2009 (making reference to Article 39 of Regulation EC No 178/2002)"
},
{
"name": "4. commercial information",
"description": "commercial information revealing sourcing, market shares or business strategy of the applicant",
"reason": "Article 63(2)(a) of Regulation (EC) No 1107/2009 (making reference to Article 39 of Regulation EC No 178/2002)"
},
{
"name": "5. quantitative composition",
"description": "quantitative composition of the subject matter of the request, except for information which is relevant to the assessment of safety",
"reason": "Article 63(2)(a) of Regulation (EC) No 1107/2009 (making reference to Article 39 of Regulation EC No 178/2002)"
},
{
"name": "6. specification of impurity",
"description": "the specification of impurity of the active substance and the related methods of analysis for impurities in the active substance as manufactured, except for the impurities that are considered to be toxicologically, ecotoxicologically or environmentally relevant and the related methods of analysis for such impurities",
"reason": "Article 63(2)(b) of Regulation (EC) No 1107/2009"
},
{
"name": "7. results of production batches",
"description": "results of production batches of the active substance including impurities",
"reason": "Article 63(2)(c) of Regulation (EC) No 1107/2009"
},
{
"name": "8. composition of a plant protection product",
"description": "information on the complete composition of a plant protection product",
"reason": "Article 63(2)(d) of Regulation (EC) No 1107/2009"
}
]

View File

@ -0,0 +1,480 @@
package drools
import com.iqser.red.service.redaction.v1.server.redaction.model.Section
global Section section
// --------------------------------------- CBI rules -------------------------------------------------------------------
rule "0: Add CBI_author from ai"
when
Section(aiMatchesType("CBI_author"))
then
section.addAiEntities("CBI_author", "CBI_author");
end
rule "0: Combine ai types CBI_author from ai"
when
Section(aiMatchesType("ORG"))
then
section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false);
end
rule "1: Redact CBI Authors (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author"))
then
section.redact("CBI_author", 1, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "2: Redact CBI Authors (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author"))
then
section.redact("CBI_author", 2, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "3: Redact not CBI Address (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address"))
then
section.redactNot("CBI_address", 3, "Address found for non vertebrate study");
section.ignoreRecommendations("CBI_address");
end
rule "4: Redact CBI Address (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address"))
then
section.redact("CBI_address", 4, "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "5: Do not redact genitive CBI_author"
when
Section(matchesType("CBI_author"))
then
section.expandToFalsePositiveByRegEx("CBI_author", "[''ʼˈ´`ʻ']s", false, 0);
end
rule "6: Redact Author(s) cells in Tables with Author(s) header (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N"))
then
section.redactCell("Author(s)", 6, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "7: Redact Author(s) cells in Tables with Author(s) header (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N"))
then
section.redactCell("Author(s)", 7, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "8: Redact Author cells in Tables with Author header (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N"))
then
section.redactCell("Author", 8, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "9: Redact Author cells in Tables with Author header (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N"))
then
section.redactCell("Author", 9, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "10: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No")))
then
section.redactCell("Author(s)", 10, "CBI_author", true, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "11: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No")))
then
section.redactCell("Author(s)", 11, "CBI_author", true, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "13: Redact addresses that start with BL or CTL"
when
Section(searchText.contains("BL") || searchText.contains("CT"))
then
section.redactNotAndRecommendByRegEx("((\\b((([Cc]T(([1ILli\\/])| L|~P))|(BL))[\\. ]?([\\dA-Ziltphz~\\/.:!]| ?[\\(',][Ppi](\\(e)?|([\\(-?']\\/))+( ?[\\(\\/\\dA-Znasieg]+)?)\\b( ?\\/? ?\\d+)?)|(\\bCT[L1i]\\b))", true, 0, "CBI_address", 13, "Laboratory for vertebrate studies found");
end
rule "14: Redact and add recommendation for et al. author (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al"))
then
section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 14, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "15: Redact and add recommendation for et al. author (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al"))
then
section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 15, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "16: Add recommendation for Addresses in Test Organism sections"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species:") && searchText.contains("Source:"))
then
section.recommendLineAfter("Source:", "CBI_address");
end
rule "17: Add recommendation for Addresses in Test Animals sections"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species") && searchText.contains("Source"))
then
section.recommendLineAfter("Source", "CBI_address");
end
rule "18: Do not redact Names and Addresses if Published Information found"
when
Section(matchesType("published_information"))
then
section.redactNotAndReference("CBI_author","published_information", 18, "Published Information found");
section.redactNotAndReference("CBI_address","published_information", 18, "Published Information found");
end
// --------------------------------------- PII rules -------------------------------------------------------------------
rule "19: Redacted PII Personal Identification Information (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII"))
then
section.redact("PII", 19, "PII (Personal Identification Information) found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "20: Redacted PII Personal Identification Information (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII"))
then
section.redact("PII", 20, "PII (Personal Identification Information) found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "21: Redact Emails by RegEx (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@"))
then
section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 21, "PII (Personal Identification Information) found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "22: Redact Emails by RegEx (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@"))
then
section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 22, "PII (Personal Identification Information) found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "23: Redact telephone numbers by RegEx (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && containsRegEx("[+]\\d{2,}", true))
then
section.redactByRegEx("((([+]\\d{2,3} (\\d{7,12})\\b)|([+]\\d{2,3}(\\d{3,12})\\b|[+]\\d{2,3}([ -]\\(?\\d{2,6}\\)?){2,4})|[+]\\d{2,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", true, 1, "PII", 23, "PII (Personal Identification Information) found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "24: Redact telephone numbers by RegEx (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && containsRegEx("[+]\\d{2,}", true))
then
section.redactByRegEx("((([+]\\d{2,3} (\\d{7,12})\\b)|([+]\\d{2,3}(\\d{3,12})\\b|[+]\\d{2,3}([ -]\\(?\\d{2,6}\\)?){2,4})|[+]\\d{2,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", true, 1, "PII", 24, "PII (Personal Identification Information) found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "25: Recommend telephone numbers by RegEx"
when
Section(containsRegEx("\\d{3,}", true) || containsRegEx("[+]\\d{2,}", true))
then
section.addRecommendationByRegEx("([+]\\d{2,3} (\\d{7,12})\\b|((([+]\\d{2,3}(\\d{3,12})\\b|[+]\\d{2,3}([ -]\\(?\\d{2,6}\\)?){2,4})|(\\(?\\b(\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b))", true, 1, "PII");
end
rule "26: Redact contact information (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (text.contains("Contact point:")
|| text.contains("Phone:")
|| text.contains("Fax:")
|| text.contains("Tel.:")
|| text.contains("Tel:")
|| text.contains("E-mail:")
|| text.contains("Email:")
|| text.contains("e-mail:")
|| text.contains("E-mail address:")
|| text.contains("Alternative contact:")
|| text.contains("Telephone number:")
|| text.contains("Telephone No:")
|| text.contains("Fax number:")
|| text.contains("Telephone:")
|| text.contains("Phone No.")
|| text.contains("European contact:")))
then
section.redactLineAfter("Contact point:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Phone:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Fax:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Tel.:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Tel:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("E-mail:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Email:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("e-mail:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("E-mail address:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Contact:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Alternative contact:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone number:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone No:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Fax number:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Phone No.", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactBetween("No:", "Fax", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactBetween("Contact:", "Tel.:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("European contact:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "27: Redact contact information (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (text.contains("Contact point:")
|| text.contains("Phone:")
|| text.contains("Fax:")
|| text.contains("Tel.:")
|| text.contains("Tel:")
|| text.contains("E-mail:")
|| text.contains("Email:")
|| text.contains("e-mail:")
|| text.contains("E-mail address:")
|| text.contains("Alternative contact:")
|| text.contains("Telephone number:")
|| text.contains("Telephone No:")
|| text.contains("Fax number:")
|| text.contains("Telephone:")
|| text.contains("Phone No.")
|| text.contains("European contact:")))
then
section.redactLineAfter("Contact point:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Phone:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Fax:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Tel.:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Tel:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("E-mail:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Email:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("e-mail:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("E-mail address:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Contact:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Alternative contact:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone number:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone No:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Fax number:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Phone No.", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactBetween("No:", "Fax", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactBetween("Contact:", "Tel.:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("European contact:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "28: Redact contact information if applicant is found (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (headlineContainsWord("applicant") || text.contains("Applicant") || headlineContainsWord("Primary contact") || headlineContainsWord("Alternative contact") || text.contains("Telephone number:")))
then
section.redactLineAfter("Contact point:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Phone:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Fax:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Tel.:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Tel:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("E-mail:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Email:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("e-mail:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("E-mail address:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Contact:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Alternative contact:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone number:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone No:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Fax number:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Phone No.", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactBetween("No:", "Fax", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactBetween("Contact:", "Tel.:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("European contact:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "29: Redact contact information if applicant is found (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (headlineContainsWord("applicant") || text.contains("Applicant") || headlineContainsWord("Primary contact") || headlineContainsWord("Alternative contact") || text.contains("Telephone number:")))
then
section.redactLineAfter("Contact point:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Phone:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Fax:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Tel.:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Tel:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("E-mail:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Email:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("e-mail:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("E-mail address:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Contact:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Alternative contact:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone number:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone No:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Fax number:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Phone No.", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactBetween("No:", "Fax", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactBetween("Contact:", "Tel.:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("European contact:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "30: Redact contact information if Producer is found (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (text.toLowerCase().contains("producer of the plant protection") || text.toLowerCase().contains("producer of the active substance") || text.contains("Manufacturer of the active substance") || text.contains("Manufacturer:") || text.contains("Producer or producers of the active substance")))
then
section.redactLineAfter("Contact:", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone:", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Phone:", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Fax:", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("E-mail:", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Contact:", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Fax number:", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone number:", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Tel:", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactLineAfter("Phone No.", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactBetween("No:", "Fax", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "31: Redact contact information if Producer is found (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (text.toLowerCase().contains("producer of the plant protection") || text.toLowerCase().contains("producer of the active substance") || text.contains("Manufacturer of the active substance") || text.contains("Manufacturer:") || text.contains("Producer or producers of the active substance")))
then
section.redactLineAfter("Contact:", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone:", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Phone:", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Fax:", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("E-mail:", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Contact:", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Fax number:", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Telephone number:", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Tel:", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactLineAfter("Phone No.", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
section.redactBetween("No:", "Fax", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "32: Redact AUTHOR(S) (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("AUTHOR(S):") && searchText.contains("COMPLETION DATE:") && !searchText.contains("STUDY COMPLETION DATE:"))
then
section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 32, true, "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "33: Redact AUTHOR(S) (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("AUTHOR(S):") && searchText.contains("COMPLETION DATE:") && !searchText.contains("STUDY COMPLETION DATE:"))
then
section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 33, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "34: Redact AUTHOR(S) (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("AUTHOR(S):") && searchText.contains("STUDY COMPLETION DATE:"))
then
section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 34, true, "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "35: Redact AUTHOR(S) (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("AUTHOR(S):") && searchText.contains("STUDY COMPLETION DATE:"))
then
section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 35, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "36: Redact PERFORMING LABORATORY (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("PERFORMING LABORATORY:"))
then
section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 36, true, "PERFORMING LABORATORY was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
section.redactNot("CBI_address", 36, "Performing laboratory found for non vertebrate study");
end
rule "37: Redact PERFORMING LABORATORY (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("PERFORMING LABORATORY:"))
then
section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 37, true, "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
// --------------------------------------- other rules -------------------------------------------------------------------
rule "50: Purity Hint"
when
Section(searchText.toLowerCase().contains("purity"))
then
section.addHintAnnotationByRegEx("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", true, 1, "hint_only");
end
rule "51: Ignore dossier_redaction entries if confidential"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Confidentiality","confidential") && matchesType("dossier_redaction"));
then
section.ignore("dossier_redaction");
end
rule "52: Redact signatures (Non vertebrate study)"
when
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature"))
then
section.redactImage("signature", 52, "Signature found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "53: Redact signatures (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature"))
then
section.redactImage("signature", 53, "Signature found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "54: Redact Logos (Vertebrate study)"
when
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("logo"))
then
section.redactImage("logo", 54, "Logo found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
end

View File

@ -0,0 +1,185 @@
{
"types": [
{
"type": "CBI_address",
"hexColor": "#9398a0",
"recommendationHexColor": "#8df06c",
"rank": 140,
"hint": false,
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
"caseInsensitive": false,
"recommendation": false,
"description": "All site names and addresses, and location (e.g. Syngenta, Monthey, GPS Co-ordinates, Mr Smith of … providing the…). Except addresses in published literature and the applicant address.",
"addToDictionaryAction": true,
"label": "CBI Address",
"hasDictionary": true,
"systemManaged": false
},
{
"type": "CBI_author",
"hexColor": "#9398a0",
"recommendationHexColor": "#8df06c",
"rank": 130,
"hint": false,
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
"caseInsensitive": false,
"recommendation": false,
"description": "All authors named in the study documentation. Except names in published literature.",
"addToDictionaryAction": true,
"label": "CBI Author",
"hasDictionary": true,
"systemManaged": false
},
{
"type": "PII",
"hexColor": "#9398a0",
"recommendationHexColor": "#8df06c",
"rank": 150,
"hint": false,
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
"caseInsensitive": false,
"recommendation": false,
"description": "Not authors but listed in the document: Names, signatures, telephone, email etc.; e.g. Reg Manager, QA Manager",
"addToDictionaryAction": true,
"label": "PII",
"hasDictionary": true,
"systemManaged": false
},
{
"type": "formula",
"hexColor": "#036ffc",
"recommendationHexColor": "#8df06c",
"rank": 1002,
"hint": true,
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
"caseInsensitive": true,
"recommendation": false,
"description": "Empty dictionary used to configure formula colors.",
"addToDictionaryAction": false,
"label": "Formula",
"hasDictionary": false,
"systemManaged": true
},
{
"type": "hint_only",
"hexColor": "#fa98f7",
"recommendationHexColor": "#8df06c",
"rank": 50,
"hint": true,
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
"caseInsensitive": true,
"recommendation": false,
"description": "Entries of this dictionary will be highlighted only",
"addToDictionaryAction": false,
"label": "Hint Only",
"hasDictionary": true,
"systemManaged": false
},
{
"type": "image",
"hexColor": "#bdd6ff",
"recommendationHexColor": "#8df06c",
"rank": 999,
"hint": true,
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
"caseInsensitive": true,
"recommendation": false,
"description": "Empty dictionary used to configure image colors.",
"addToDictionaryAction": false,
"label": "Image",
"hasDictionary": false,
"systemManaged": true
},
{
"type": "logo",
"hexColor": "#9398a0",
"recommendationHexColor": "#8df06c",
"rank": 1001,
"hint": false,
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
"caseInsensitive": true,
"recommendation": false,
"description": "Empty dictionary used to configure logo colors.",
"addToDictionaryAction": false,
"label": "Logo",
"hasDictionary": false,
"systemManaged": true
},
{
"type": "must_redact",
"hexColor": "#9398a0",
"recommendationHexColor": "#8df06c",
"rank": 100,
"hint": false,
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
"caseInsensitive": true,
"recommendation": false,
"description": "Entries of this dictionary get redacted wherever found.",
"addToDictionaryAction": false,
"label": "Must Redact",
"hasDictionary": true,
"systemManaged": false
},
{
"type": "ocr",
"hexColor": "#bdd6ff",
"recommendationHexColor": "#8df06c",
"rank": 1000,
"hint": true,
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
"caseInsensitive": true,
"recommendation": false,
"description": "Empty dictionary used to configure ocr colors.",
"addToDictionaryAction": false,
"label": "Ocr",
"hasDictionary": false,
"systemManaged": true
},
{
"type": "published_information",
"hexColor": "#85ebff",
"recommendationHexColor": "#8df06c",
"rank": 70,
"hint": true,
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
"caseInsensitive": false,
"recommendation": false,
"description": "Manual managed list of public journals and papers that need no redaction",
"addToDictionaryAction": true,
"label": "Published Information",
"hasDictionary": true,
"systemManaged": false
},
{
"type": "signature",
"hexColor": "#9398a0",
"recommendationHexColor": "#8df06c",
"rank": 1003,
"hint": false,
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
"caseInsensitive": true,
"recommendation": false,
"description": "Empty dictionary used to configure signature colors.",
"addToDictionaryAction": false,
"label": "Signature",
"hasDictionary": false,
"systemManaged": true
},
{
"type": "imported_redaction",
"hexColor": "#f0f0c0",
"recommendationHexColor": "#8df06c",
"rank": 9999,
"hint": false,
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
"caseInsensitive": false,
"recommendation": false,
"description": "Redaction Annotations that were imported from documents",
"addToDictionaryAction": false,
"label": "Imported Redaction",
"hasDictionary": false,
"systemManaged": true,
"autoHideSkipped": true
}
]
}