RED-3992 - Replaced naive approach with aho corassick string search. Cleaned up code. reduced ammount of unnecesary conversions/invocations
This commit is contained in:
parent
cfd9d7665b
commit
36967cab2c
@ -5,7 +5,7 @@
|
||||
<parent>
|
||||
<groupId>com.iqser.red</groupId>
|
||||
<artifactId>platform-docker-dependency</artifactId>
|
||||
<version>1.1.0</version>
|
||||
<version>1.2.0</version>
|
||||
<relativePath />
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
@ -42,7 +42,7 @@
|
||||
<artifactId>docker-maven-plugin</artifactId>
|
||||
</plugin>
|
||||
</plugins>
|
||||
|
||||
|
||||
<pluginManagement>
|
||||
<plugins>
|
||||
<plugin>
|
||||
@ -95,4 +95,4 @@
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
</build>
|
||||
</project>
|
||||
</project>
|
||||
|
||||
@ -24,6 +24,19 @@
|
||||
<groupId>com.iqser.red.commons</groupId>
|
||||
<artifactId>storage-commons</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.ahocorasick</groupId>
|
||||
<artifactId>ahocorasick</artifactId>
|
||||
<version>0.6.3</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.openjdk.jol</groupId>
|
||||
<artifactId>jol-core</artifactId>
|
||||
<version>0.10</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>redaction-service-api-v1</artifactId>
|
||||
|
||||
@ -54,16 +54,6 @@ public class Dictionary {
|
||||
}
|
||||
|
||||
|
||||
public boolean containsValue(String type, String value) {
|
||||
|
||||
return localAccessMap.containsKey(type) && localAccessMap.get(type)
|
||||
.getValues(false)
|
||||
.contains(value) || localAccessMap.containsKey(type) && localAccessMap.get(type)
|
||||
.getValues(true)
|
||||
.contains(value);
|
||||
}
|
||||
|
||||
|
||||
public boolean isHint(String type) {
|
||||
|
||||
DictionaryModel model = localAccessMap.get(type);
|
||||
|
||||
@ -23,4 +23,4 @@ public class DictionaryEntries {
|
||||
@Builder.Default
|
||||
Set<DictionaryEntry> falseRecommendations = new HashSet<>();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -4,8 +4,10 @@ package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import org.ahocorasick.trie.Trie;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@ -14,30 +16,101 @@ import java.util.stream.Collectors;
|
||||
@AllArgsConstructor
|
||||
public class DictionaryModel implements Serializable {
|
||||
|
||||
private String type;
|
||||
private int rank;
|
||||
private float[] color;
|
||||
private boolean caseInsensitive;
|
||||
private boolean hint;
|
||||
private Set<DictionaryEntry> entries;
|
||||
private Set<DictionaryEntry> falsePositives;
|
||||
private Set<DictionaryEntry> falseRecommendations;
|
||||
private Set<String> localEntries;
|
||||
private boolean isDossierDictionary;
|
||||
private final String type;
|
||||
private final int rank;
|
||||
private final float[] color;
|
||||
private final boolean caseInsensitive;
|
||||
private final boolean hint;
|
||||
private final boolean isDossierDictionary;
|
||||
private final Set<DictionaryEntry> entries;
|
||||
private final Set<DictionaryEntry> falsePositives;
|
||||
private final Set<DictionaryEntry> falseRecommendations;
|
||||
|
||||
private transient Trie entriesTrie;
|
||||
private transient Trie falsePositivesTrie;
|
||||
private transient Trie falseRecommendationsTrie;
|
||||
|
||||
private transient Trie localEntriesTrie;
|
||||
|
||||
private final Set<String> localEntries = new HashSet<>();
|
||||
|
||||
public DictionaryModel(String type,
|
||||
int rank,
|
||||
float[] color,
|
||||
boolean caseInsensitive,
|
||||
boolean hint,
|
||||
Set<DictionaryEntry> entries,
|
||||
Set<DictionaryEntry> falsePositives,
|
||||
Set<DictionaryEntry> falseRecommendations,
|
||||
boolean isDossierDictionary) {
|
||||
this.type = type;
|
||||
this.rank = rank;
|
||||
this.color = color;
|
||||
this.caseInsensitive = caseInsensitive;
|
||||
this.hint = hint;
|
||||
this.isDossierDictionary = isDossierDictionary;
|
||||
|
||||
this.entries = entries;
|
||||
this.falsePositives = falsePositives;
|
||||
this.falseRecommendations = falseRecommendations;
|
||||
|
||||
this.entriesTrie = buildTrie(entries);
|
||||
this.falsePositivesTrie = buildTrie(falsePositives);
|
||||
this.falseRecommendationsTrie = buildTrie(falseRecommendations);
|
||||
|
||||
public Set<String> getValues(boolean local) {
|
||||
return local ? localEntries : entries.stream().filter(e -> !e.isDeleted()).map(e -> e.getValue()).collect(Collectors
|
||||
.toSet());
|
||||
}
|
||||
|
||||
public Set<String> getFalsePositiveValues() {
|
||||
return falsePositives.stream().filter(e -> !e.isDeleted()).map(e -> e.getValue()).collect(Collectors
|
||||
.toSet());
|
||||
|
||||
public Trie getLocalEntriesTrie() {
|
||||
if (localEntriesTrie == null) {
|
||||
this.localEntriesTrie = buildTrieFromStrings(this.localEntries);
|
||||
}
|
||||
return localEntriesTrie;
|
||||
}
|
||||
|
||||
public Set<String> getFalseRecommendationValues() {
|
||||
return falseRecommendations.stream().filter(e -> !e.isDeleted()).map(e -> e.getValue()).collect(Collectors
|
||||
.toSet());
|
||||
|
||||
public Trie getEntriesTrie() {
|
||||
if (entriesTrie == null) {
|
||||
this.entriesTrie = buildTrie(this.entries);
|
||||
}
|
||||
return entriesTrie;
|
||||
}
|
||||
|
||||
public Trie getFalsePositivesTrie() {
|
||||
if (falsePositivesTrie == null) {
|
||||
this.falsePositivesTrie = buildTrie(this.falsePositives);
|
||||
}
|
||||
return falsePositivesTrie;
|
||||
}
|
||||
|
||||
public Trie getFalseRecommendationsTrie() {
|
||||
if (falsePositivesTrie == null) {
|
||||
this.falsePositivesTrie = buildTrie(this.falseRecommendations);
|
||||
}
|
||||
return falsePositivesTrie;
|
||||
}
|
||||
|
||||
private Trie buildTrieFromStrings(Set<String> entries) {
|
||||
var builder = Trie.builder()
|
||||
.addKeywords(entries);
|
||||
|
||||
if (this.isCaseInsensitive()) {
|
||||
builder.ignoreCase();
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
private Trie buildTrie(Set<DictionaryEntry> values) {
|
||||
var builder = Trie.builder()
|
||||
.addKeywords(values.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()));
|
||||
|
||||
if (this.isCaseInsensitive()) {
|
||||
builder.ignoreCase();
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
@ -11,6 +12,9 @@ import java.util.regex.Pattern;
|
||||
|
||||
public class SearchableText {
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
private transient String stringRepresentation;
|
||||
private final List<TextPositionSequence> sequences = new ArrayList<>();
|
||||
|
||||
|
||||
@ -183,10 +187,16 @@ public class SearchableText {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return buildString(sequences);
|
||||
}
|
||||
|
||||
public String asString() {
|
||||
if (stringRepresentation == null) {
|
||||
stringRepresentation = buildString(sequences);
|
||||
}
|
||||
return stringRepresentation;
|
||||
}
|
||||
|
||||
|
||||
public String buildString(List<TextPositionSequence> sequences) {
|
||||
|
||||
|
||||
@ -1,34 +1,26 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
|
||||
import java.lang.annotation.ElementType;
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
import java.lang.annotation.Target;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.ArgumentType;
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.model.FileAttribute;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.FindEntityDetails;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.lang.annotation.ElementType;
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
import java.lang.annotation.Target;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Data
|
||||
@Slf4j
|
||||
@ -69,11 +61,13 @@ public class Section {
|
||||
private List<FileAttribute> fileAttributes = new ArrayList<>();
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public void addAiEntities(String type, String asType) {
|
||||
|
||||
Set<Entity> entitiesOfType = nerEntities.stream().filter(nerEntity -> nerEntity.getType().equals(type)).collect(Collectors.toSet());
|
||||
Set<String> values = entitiesOfType.stream().map(Entity::getWord).collect(Collectors.toSet());
|
||||
Set<Entity> found = EntitySearchUtils.findEntities(searchText, values, dictionary.getType(asType), headline, sectionNumber, false, false, Engine.NER, true, true);
|
||||
List<String> values = entitiesOfType.stream().map(Entity::getWord).collect(Collectors.toList());
|
||||
Set<Entity> found = EntitySearchUtils.findEntities(searchText, values, dictionary.getType(asType), new FindEntityDetails(asType, headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION));
|
||||
EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary);
|
||||
|
||||
Set<Entity> finalResult = new HashSet<>();
|
||||
@ -98,7 +92,8 @@ public class Section {
|
||||
nerEntities.removeAll(entitiesOfType);
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public void combineAiTypes(String startType, String combineTypes, int maxDistanceBetween, String asType, int minPartMatches, boolean allowDuplicateTypes) {
|
||||
|
||||
Set<String> combineSet = Set.of(combineTypes.split(","));
|
||||
@ -160,35 +155,35 @@ public class Section {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean fileAttributeByIdEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String id, @Argument(ArgumentType.STRING) String value) {
|
||||
|
||||
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> id.equals(attribute.getId()) && value.equals(attribute.getValue()));
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean fileAttributeByPlaceholderEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String placeholder, @Argument(ArgumentType.STRING) String value) {
|
||||
|
||||
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> placeholder.equals(attribute.getPlaceholder()) && value.equals(attribute.getValue()));
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean fileAttributeByLabelEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String label, @Argument(ArgumentType.STRING) String value) {
|
||||
|
||||
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> label.equals(attribute.getLabel()) && value.equals(attribute.getValue()));
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean fileAttributeByIdEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String id, @Argument(ArgumentType.STRING) String value) {
|
||||
|
||||
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> id.equals(attribute.getId()) && value.equalsIgnoreCase(attribute.getValue()));
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean fileAttributeByPlaceholderEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String placeholder, @Argument(ArgumentType.STRING) String value) {
|
||||
|
||||
@ -196,14 +191,14 @@ public class Section {
|
||||
.anyMatch(attribute -> placeholder.equals(attribute.getPlaceholder()) && value.equalsIgnoreCase(attribute.getValue()));
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean fileAttributeByLabelEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String label, @Argument(ArgumentType.STRING) String value) {
|
||||
|
||||
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> label.equals(attribute.getLabel()) && value.equalsIgnoreCase(attribute.getValue()));
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean hasTableHeader(@Argument(ArgumentType.STRING) String headerName) {
|
||||
|
||||
@ -211,37 +206,37 @@ public class Section {
|
||||
return tabularData != null && tabularData.containsKey(cleanHeaderName);
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean aiMatchesType(@Argument(ArgumentType.TYPE) String type) {
|
||||
|
||||
return nerEntities.stream().anyMatch(entity -> !entity.isIgnored() && entity.getType().equals(type));
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean matchesType(@Argument(ArgumentType.TYPE) String type) {
|
||||
|
||||
return entities.stream().anyMatch(entity -> !entity.isIgnored() && entity.getType().equals(type));
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean matchesImageType(@Argument(ArgumentType.TYPE) String type) {
|
||||
|
||||
return images.stream().anyMatch(image -> !image.isIgnored() && image.getType().equals(type));
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean headlineContainsWord(@Argument(ArgumentType.STRING) String word) {
|
||||
|
||||
return StringUtils.containsIgnoreCase(headline, word);
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean containsRegEx(@Argument(ArgumentType.STRING) String regEx, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive){
|
||||
public boolean containsRegEx(@Argument(ArgumentType.STRING) String regEx, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive) {
|
||||
|
||||
var compiledPattern = Patterns.getCompiledPattern(regEx, patternCaseInsensitive);
|
||||
|
||||
@ -250,7 +245,7 @@ public class Section {
|
||||
return matcher.find();
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean rowEquals(@Argument(ArgumentType.STRING) String headerName, @Argument(ArgumentType.STRING) String value) {
|
||||
|
||||
@ -259,18 +254,20 @@ public class Section {
|
||||
return tabularData != null && tabularData.containsKey(cleanHeaderName) && tabularData.get(cleanHeaderName).toString().equals(value);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@ThenAction
|
||||
public void expandByPrefixRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.REGEX) String prefixPattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group) {
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group) {
|
||||
|
||||
expandByPrefixRegEx(type, prefixPattern, patternCaseInsensitive, group, null);
|
||||
}
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void expandByPrefixRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.REGEX) String prefixPattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group,
|
||||
@Argument(ArgumentType.REGEX) String valuePattern) {
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group,
|
||||
@Argument(ArgumentType.REGEX) String valuePattern) {
|
||||
|
||||
if (StringUtils.isEmpty(prefixPattern)) return;
|
||||
|
||||
@ -315,6 +312,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void expandByRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.REGEX) String suffixPattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group) {
|
||||
|
||||
@ -323,6 +321,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void expandByRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.REGEX) String suffixPattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group,
|
||||
@Argument(ArgumentType.REGEX) String valuePattern) {
|
||||
@ -370,6 +369,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactImage(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
@ -378,6 +378,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactNotImage(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason) {
|
||||
|
||||
redactImage(type, ruleNumber, reason, null, false);
|
||||
@ -385,6 +386,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redact(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
@ -393,6 +395,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactNot(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason) {
|
||||
|
||||
redact(type, ruleNumber, reason, null, false);
|
||||
@ -400,6 +403,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactLineAfter(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.TYPE) String asType, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere, @Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
@ -409,6 +413,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactNotLineAfter(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.TYPE) String asType, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere, @Argument(ArgumentType.STRING) String reason) {
|
||||
|
||||
@ -418,6 +423,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactByRegEx(@Argument(ArgumentType.REGEX) String pattern, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group, @Argument(ArgumentType.TYPE) String asType, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
@ -427,6 +433,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactNotByRegEx(@Argument(ArgumentType.REGEX) String pattern, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group, @Argument(ArgumentType.TYPE) String asType, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason) {
|
||||
@ -436,6 +443,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop, @Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
@ -445,6 +453,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactNotBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop, @Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.STRING) String reason) {
|
||||
@ -454,6 +463,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactLinesBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop, @Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
@ -463,6 +473,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactNotLinesBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop, @Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.STRING) String reason) {
|
||||
@ -472,6 +483,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactCell(@Argument(ArgumentType.STRING) String cellHeader, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.TYPE) String type,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean addAsRecommendations, @Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
@ -481,6 +493,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactNotCell(@Argument(ArgumentType.STRING) String cellHeader, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.TYPE) String type,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean addAsRecommendations, @Argument(ArgumentType.STRING) String reason) {
|
||||
|
||||
@ -489,6 +502,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactAndRecommendByRegEx(@Argument(ArgumentType.REGEX) String pattern, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group, @Argument(ArgumentType.TYPE) String asType, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
@ -498,6 +512,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactNotAndRecommendByRegEx(@Argument(ArgumentType.REGEX) String pattern, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group, @Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason) {
|
||||
@ -507,6 +522,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void addRecommendationByRegEx(@Argument(ArgumentType.REGEX) String pattern, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group, @Argument(ArgumentType.TYPE) String asType) {
|
||||
|
||||
@ -524,6 +540,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactNotAndReference(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.REFERENCE_TYPE) String referenceType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason) {
|
||||
|
||||
@ -542,6 +559,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactIfPrecededBy(@Argument(ArgumentType.STRING) String prefix, @Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
@ -557,6 +575,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void addRedaction(@Argument(ArgumentType.STRING) String value, @Argument(ArgumentType.TYPE) String asType, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
@ -564,13 +583,15 @@ public class Section {
|
||||
EntitySearchUtils.addEntitiesIgnoreRank(entities, found);
|
||||
}
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void ignore(String type) {
|
||||
|
||||
entities.removeIf(entity -> entity.getType().equals(type) && entity.getEntityType().equals(EntityType.ENTITY));
|
||||
}
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void ignoreRecommendations(String type) {
|
||||
|
||||
entities.removeIf(entity -> entity.getType().equals(type) && entity.getEntityType().equals(EntityType.RECOMMENDATION));
|
||||
@ -578,8 +599,9 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void expandToFalsePositiveByRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.STRING) String pattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group) {
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group) {
|
||||
|
||||
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||
|
||||
@ -607,6 +629,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void addHintAnnotationByRegEx(@Argument(ArgumentType.REGEX) String pattern, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group, @Argument(ArgumentType.TYPE) String asType) {
|
||||
|
||||
@ -625,6 +648,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void addHintAnnotation(@Argument(ArgumentType.STRING) String value, @Argument(ArgumentType.TYPE) String asType) {
|
||||
|
||||
Set<Entity> found = findEntities(value.trim(), asType, true, false, 0, null, null, Engine.RULE, false);
|
||||
@ -633,6 +657,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void recommendLineAfter(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.TYPE) String asType) {
|
||||
|
||||
String[] values = StringUtils.substringsBetween(text, start, "\n");
|
||||
@ -657,6 +682,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void highlightCell(@Argument(ArgumentType.STRING) String cellHeader, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.TYPE) String type) {
|
||||
|
||||
annotateCell(cellHeader, ruleNumber, type, false, false, null, null);
|
||||
@ -684,8 +710,8 @@ public class Section {
|
||||
String text = caseInsensitive ? searchText.toLowerCase() : searchText;
|
||||
String searchValue = caseInsensitive ? value.toLowerCase() : value;
|
||||
|
||||
Set<Entity> found = EntitySearchUtils.findEntities(text, Set.of(searchValue), dictionary.getType(asType), headline, sectionNumber, false, false, engine, false, asRecommendation);
|
||||
|
||||
Set<Entity> found = EntitySearchUtils.findEntities(text, List.of(searchValue), dictionary.getType(asType),
|
||||
new FindEntityDetails(asType, headline, sectionNumber, false, false, engine, asRecommendation ? EntityType.RECOMMENDATION : EntityType.ENTITY));
|
||||
found.forEach(entity -> {
|
||||
if (redacted) {
|
||||
entity.setRedaction(true);
|
||||
|
||||
@ -129,6 +129,10 @@ public class DictionaryService {
|
||||
var newFalsePositivesValues = newEntries.getFalsePositives().stream().map(DictionaryEntry::getValue).collect(Collectors.toSet());
|
||||
var newFalseRecommendationsValues = newEntries.getFalseRecommendations().stream().map(DictionaryEntry::getValue).collect(Collectors.toSet());
|
||||
|
||||
|
||||
oldModel.ifPresent(oldDictionaryModel -> {
|
||||
|
||||
});
|
||||
// add old entries from existing DictionaryModel
|
||||
oldModel.ifPresent(dictionaryModel -> entries.addAll(dictionaryModel.getEntries().stream().filter(
|
||||
f -> !newValues.contains(f.getValue())).collect(Collectors.toList())
|
||||
@ -146,7 +150,7 @@ public class DictionaryService {
|
||||
falseRecommendations.addAll(newEntries.getFalseRecommendations());
|
||||
|
||||
return new DictionaryModel(t.getType(), t.getRank(), convertColor(t.getHexColor()), t.isCaseInsensitive(), t
|
||||
.isHint(), entries, falsePositives, falseRecommendations, new HashSet<>(), dossierId != null);
|
||||
.isHint(), entries, falsePositives, falseRecommendations, dossierId != null);
|
||||
})
|
||||
.sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed())
|
||||
.collect(Collectors.toList());
|
||||
@ -193,6 +197,7 @@ public class DictionaryService {
|
||||
}
|
||||
|
||||
|
||||
|
||||
private float[] convertColor(String hex) {
|
||||
|
||||
Color color = Color.decode(hex);
|
||||
|
||||
@ -1,18 +1,5 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
@ -21,22 +8,20 @@ import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.FindEntityDetails;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@ -86,6 +71,7 @@ public class EntityRedactionService {
|
||||
surroundingWordsService.addSurroundingText(entities.getEntities(), reanalysisSection.getSearchableText(), dictionary);
|
||||
}
|
||||
|
||||
|
||||
if (!local && analyzeRequest.getManualRedactions() != null) {
|
||||
|
||||
var approvedForceRedactions = analyzeRequest.getManualRedactions().getForceRedactions().stream()
|
||||
@ -124,6 +110,7 @@ public class EntityRedactionService {
|
||||
}));
|
||||
}
|
||||
|
||||
|
||||
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
|
||||
.isLocal(false)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
@ -142,6 +129,7 @@ public class EntityRedactionService {
|
||||
.images(reanalysisSection.getImages())
|
||||
.fileAttributes(analyzeRequest.getFileAttributes())
|
||||
.build(), reanalysisSection.getSearchableText()));
|
||||
|
||||
}
|
||||
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
@ -200,18 +188,18 @@ public class EntityRedactionService {
|
||||
private void addLocalValuesToDictionary(Section analysedSection, Dictionary dictionary) {
|
||||
|
||||
analysedSection.getLocalDictionaryAdds().keySet().forEach(key -> {
|
||||
analysedSection.getLocalDictionaryAdds().get(key).forEach(value -> {
|
||||
analysedSection.getLocalDictionaryAdds().get(key).forEach(value -> {
|
||||
|
||||
if (dictionary.getLocalAccessMap().get(key) == null) {
|
||||
log.warn("Dictionary {} is null", key);
|
||||
}
|
||||
if (dictionary.getLocalAccessMap().get(key) == null) {
|
||||
log.warn("Dictionary {} is null", key);
|
||||
}
|
||||
|
||||
if (dictionary.getLocalAccessMap().get(key).getLocalEntries() == null) {
|
||||
log.warn("Dictionary {} localEntries is null", key);
|
||||
}
|
||||
if (dictionary.getLocalAccessMap().get(key).getLocalEntries() == null) {
|
||||
log.warn("Dictionary {} localEntries is null", key);
|
||||
}
|
||||
|
||||
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
|
||||
});
|
||||
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@ -221,18 +209,20 @@ public class EntityRedactionService {
|
||||
List<Integer> cellStarts) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
String searchableString = searchableText.toString();
|
||||
String searchableString = searchableText.asString();
|
||||
|
||||
if (StringUtils.isEmpty(searchableString)) {
|
||||
return new Entities(new HashSet<>(), new HashSet<>());
|
||||
}
|
||||
|
||||
String lowercaseInputString = searchableString.toLowerCase();
|
||||
for (DictionaryModel model : dictionary.getDictionaryModels()) {
|
||||
if (model.isCaseInsensitive()) {
|
||||
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.findEntities(lowercaseInputString, model.getValues(local), model, headline, sectionNumber, !local, model.isDossierDictionary(), local ? Engine.RULE : Engine.DICTIONARY, false, local ? true : false));
|
||||
} else {
|
||||
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.findEntities(searchableString, model.getValues(local), model, headline, sectionNumber, !local, model.isDossierDictionary(), local ? Engine.RULE : Engine.DICTIONARY, false, local ? true : false));
|
||||
}
|
||||
|
||||
var trie = local ? model.getLocalEntriesTrie() : model.getEntriesTrie();
|
||||
var entities = EntitySearchUtils.findEntities(model.isCaseInsensitive() ? lowercaseInputString : searchableString,
|
||||
trie, model, new FindEntityDetails(model.getType(),headline, sectionNumber, !local, model.isDossierDictionary(), local ? Engine.RULE : Engine.DICTIONARY, local? EntityType.RECOMMENDATION: EntityType.ENTITY));
|
||||
|
||||
EntitySearchUtils.addOrAddEngine(found, entities);
|
||||
}
|
||||
|
||||
Set<Entity> nerFound = new HashSet<>();
|
||||
|
||||
@ -3,7 +3,9 @@ package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.FindEntityDetails;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@ -91,7 +93,7 @@ public class ManualRedactionSurroundingTextService {
|
||||
private Pair<String, String> findSurroundingText(SectionText sectionText, String value,
|
||||
List<Rectangle> toFindPositions) {
|
||||
|
||||
Set<Entity> entities = EntitySearchUtils.find(sectionText.getText(), Set.of(value), "dummy", sectionText.getHeadline(), sectionText.getSectionNumber(), false, false, Engine.DICTIONARY, false, EntityType.ENTITY);
|
||||
Set<Entity> entities = EntitySearchUtils.find(sectionText.getText(), value,new FindEntityDetails( "dummy", sectionText.getHeadline(), sectionText.getSectionNumber(), false, false, Engine.DICTIONARY, EntityType.ENTITY));
|
||||
Set<Entity> entitiesWithPositions = EntitySearchUtils.clearAndFindPositions(entities, sectionText.getSearchableText(), null);
|
||||
|
||||
Entity correctEntity = getEntityOnCorrectPosition(entitiesWithPositions, toFindPositions);
|
||||
|
||||
@ -175,7 +175,7 @@ public class SectionTextBuilderService {
|
||||
sectionText.getSectionAreas().add(sectionArea);
|
||||
}
|
||||
|
||||
sectionText.setText(searchableText.toString());
|
||||
sectionText.setText(searchableText.asString());
|
||||
sectionText.setHeadline(headline);
|
||||
sectionText.setSectionNumber(sectionNumber.intValue());
|
||||
sectionText.setTable(false);
|
||||
|
||||
@ -31,7 +31,7 @@ public class SurroundingWordsService {
|
||||
if (dictionary != null && dictionary.isHint(entity.getType())) {
|
||||
continue;
|
||||
}
|
||||
findSurroundingWords(entity, searchableText.toString(), entity.getStart(), entity.getEnd());
|
||||
findSurroundingWords(entity, searchableText.asString(), entity.getStart(), entity.getEnd());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("Could not get surrounding text!");
|
||||
@ -47,7 +47,7 @@ public class SurroundingWordsService {
|
||||
}
|
||||
|
||||
try {
|
||||
String searchableString = searchableText.toString();
|
||||
String searchableString = searchableText.asString();
|
||||
|
||||
if (cellstarts != null) {
|
||||
for (int i = 0; i < cellstarts.size(); i++) {
|
||||
|
||||
@ -1,108 +1,95 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
|
||||
import lombok.experimental.UtilityClass;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ahocorasick.trie.Trie;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Slf4j
|
||||
@UtilityClass
|
||||
@SuppressWarnings("PMD")
|
||||
public class EntitySearchUtils {
|
||||
|
||||
|
||||
public boolean sectionContainsAny(String sectionText, Set<DictionaryIncrementValue> values) {
|
||||
var trie = Trie.builder().ignoreCase().addKeywords(values.stream().map(DictionaryIncrementValue::getValue).collect(Collectors.toList())).build();
|
||||
return trie.containsMatch(sectionText.toLowerCase(Locale.ROOT));
|
||||
}
|
||||
|
||||
String inputString = sectionText.toLowerCase(Locale.ROOT);
|
||||
public Set<Entity> findEntities(String inputString, List<String> values, DictionaryModel type, FindEntityDetails details) {
|
||||
|
||||
for (DictionaryIncrementValue value : values) {
|
||||
var builder = Trie.builder()
|
||||
.addKeywords(values);
|
||||
|
||||
String cleanValue = value.getValue().toLowerCase(Locale.ROOT).trim();
|
||||
|
||||
if (cleanValue.length() <= 2) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int startIndex;
|
||||
int stopIndex = 0;
|
||||
do {
|
||||
startIndex = inputString.indexOf(cleanValue, stopIndex);
|
||||
stopIndex = startIndex + cleanValue.length();
|
||||
|
||||
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
|
||||
return true;
|
||||
}
|
||||
} while (startIndex > -1);
|
||||
if (type.isCaseInsensitive()) {
|
||||
builder.ignoreCase();
|
||||
}
|
||||
return false;
|
||||
|
||||
return findEntities(inputString, builder.build(), type, details);
|
||||
}
|
||||
|
||||
|
||||
public Set<Entity> findEntities(String inputString, Set<String> values, DictionaryModel type, String headline, int sectionNumber, boolean isDictionaryEntry, boolean isDossierDictionary,
|
||||
Engine engine, boolean ignoreMinLength, boolean asRecommendation) {
|
||||
public Set<Entity> findEntities(String inputString, Trie trie, DictionaryModel type, FindEntityDetails details) {
|
||||
|
||||
Set<Entity> found = find(inputString, values, type.getType(), headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine, ignoreMinLength, asRecommendation ? EntityType.RECOMMENDATION : EntityType.ENTITY);
|
||||
Set<Entity> found = find(inputString, trie, details);
|
||||
|
||||
if(asRecommendation){
|
||||
Set<Entity> falseRecommendations = find(inputString, type.getFalseRecommendationValues(), type.getType(), headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine, ignoreMinLength, EntityType.FALSE_RECOMMENDATION);
|
||||
removeFalsePositives(found, falseRecommendations);
|
||||
found.addAll(falseRecommendations);
|
||||
} else {
|
||||
Set<Entity> falsePositives = find(inputString, type.getFalsePositiveValues(), type.getType(), headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine, ignoreMinLength, EntityType.FALSE_POSITIVE);
|
||||
removeFalsePositives(found, falsePositives);
|
||||
found.addAll(falsePositives);
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
|
||||
public Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber, boolean isDictionaryEntry, boolean isDossierDictionary,
|
||||
Engine engine, boolean ignoreMinLength, EntityType entityType) {
|
||||
Set<Entity> found = new HashSet<>();
|
||||
|
||||
for (String value : values) {
|
||||
|
||||
String cleanValue = value.trim();
|
||||
|
||||
if (!ignoreMinLength && cleanValue.length() <= 2) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int startIndex;
|
||||
int stopIndex = 0;
|
||||
do {
|
||||
startIndex = inputString.indexOf(cleanValue, stopIndex);
|
||||
stopIndex = startIndex + cleanValue.length();
|
||||
|
||||
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
|
||||
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine, entityType));
|
||||
}
|
||||
} while (startIndex > -1);
|
||||
if (details.getEntityType() == EntityType.RECOMMENDATION) {
|
||||
Set<Entity> falseRecommendations = find(inputString, type.getFalseRecommendationsTrie(), details.withEntityType(EntityType.FALSE_RECOMMENDATION));
|
||||
removeFalsePositives(found, falseRecommendations);
|
||||
found.addAll(falseRecommendations);
|
||||
} else {
|
||||
Set<Entity> falsePositives = find(inputString, type.getFalsePositivesTrie(), details.withEntityType(EntityType.FALSE_POSITIVE));
|
||||
removeFalsePositives(found, falsePositives);
|
||||
found.addAll(falsePositives);
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
public Set<Entity> find(String inputString, String value, FindEntityDetails findEntityDetails) {
|
||||
|
||||
var trie = Trie.builder()
|
||||
.addKeywords(value).build();
|
||||
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
|
||||
trie.parseText(inputString).forEach(found -> {
|
||||
var startIndex = found.getStart();
|
||||
var stopIndex = found.getEnd() + 1;
|
||||
validateAndAddEntity(entities, findEntityDetails, inputString, startIndex, stopIndex);
|
||||
|
||||
});
|
||||
return entities;
|
||||
}
|
||||
|
||||
|
||||
public Set<Entity> find(String inputString, Trie trie, FindEntityDetails findEntityDetails) {
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
|
||||
var matches = trie.parseText(inputString);
|
||||
|
||||
matches.forEach(match -> {
|
||||
var startIndex = match.getStart();
|
||||
var stopIndex = match.getEnd() + 1;
|
||||
validateAndAddEntity(entities, findEntityDetails, inputString, startIndex, stopIndex);
|
||||
});
|
||||
|
||||
return entities;
|
||||
}
|
||||
|
||||
private void validateAndAddEntity(Set<Entity> entities, FindEntityDetails findEntityDetails, String inputString, int startIndex, int stopIndex) {
|
||||
|
||||
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
|
||||
entities.add(new Entity(inputString.substring(startIndex, stopIndex), findEntityDetails.getType(), startIndex, stopIndex,
|
||||
findEntityDetails.getHeadline(), findEntityDetails.getSectionNumber(), findEntityDetails.isDictionaryEntry(),
|
||||
findEntityDetails.isDossierDictionary(), findEntityDetails.getEngine(), findEntityDetails.getEntityType()));
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isSeparator(char c) {
|
||||
|
||||
@ -140,7 +127,6 @@ public class EntitySearchUtils {
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void removeFalsePositives(Set<Entity> entities, Set<Entity> falsePositives) {
|
||||
|
||||
List<Entity> wordsToRemove = new ArrayList<>();
|
||||
@ -148,17 +134,15 @@ public class EntitySearchUtils {
|
||||
for (Entity inner : entities) {
|
||||
if (inner.getWord().length() < word.getWord()
|
||||
.length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word.getSectionNumber() == inner.getSectionNumber()) {
|
||||
wordsToRemove.add(inner);
|
||||
wordsToRemove.add(inner);
|
||||
}
|
||||
}
|
||||
}
|
||||
entities.removeAll(wordsToRemove);
|
||||
wordsToRemove.forEach(entities::remove);
|
||||
entities.removeAll(falsePositives);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
public void removeEntitiesContainedInLarger(Set<Entity> entities) {
|
||||
|
||||
List<Entity> wordsToRemove = new ArrayList<>();
|
||||
@ -216,17 +200,17 @@ public class EntitySearchUtils {
|
||||
|
||||
if (entities.contains(found)) {
|
||||
Optional<Entity> existingOptional = entities.stream().filter(entity -> entity.equals(found)).findFirst();
|
||||
if (!existingOptional.isPresent()) {
|
||||
if (existingOptional.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
var existing = existingOptional.get();
|
||||
|
||||
if (existing.getType().equals(found.getType())) {
|
||||
existing.getEngines().addAll(found.getEngines());
|
||||
if(existing.getEntityType().equals(EntityType.RECOMMENDATION) && found.getEntityType().equals(EntityType.ENTITY)
|
||||
|| existing.getEntityType().equals(EntityType.ENTITY) && found.getEntityType().equals(EntityType.RECOMMENDATION)){
|
||||
if (existing.getEntityType().equals(EntityType.RECOMMENDATION) && found.getEntityType().equals(EntityType.ENTITY)
|
||||
|| existing.getEntityType().equals(EntityType.ENTITY) && found.getEntityType().equals(EntityType.RECOMMENDATION)) {
|
||||
existing.setEntityType(EntityType.ENTITY);
|
||||
if(found.isRedaction()){
|
||||
if (found.isRedaction()) {
|
||||
existing.setRedaction(true);
|
||||
}
|
||||
}
|
||||
@ -282,7 +266,7 @@ public class EntitySearchUtils {
|
||||
for (Entity toAdd : toBeAdded) {
|
||||
if (existing.contains(toAdd)) {
|
||||
Optional<Entity> existingOptional = existing.stream().filter(entity -> entity.equals(toAdd)).findFirst();
|
||||
if (!existingOptional.isPresent()) {
|
||||
if (existingOptional.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
var existingEntity = existingOptional.get();
|
||||
|
||||
@ -0,0 +1,24 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
public class FindEntityDetails {
|
||||
|
||||
private String type;
|
||||
private String headline;
|
||||
private int sectionNumber;
|
||||
private boolean isDictionaryEntry;
|
||||
private boolean isDossierDictionary;
|
||||
private Engine engine;
|
||||
private EntityType entityType;
|
||||
|
||||
public FindEntityDetails withEntityType(EntityType entityType) {
|
||||
this.entityType = entityType;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
@ -44,6 +44,12 @@ public class RedactionStorageService {
|
||||
storageService.storeObject(StorageIdUtils.getStorageId(dossierId, fileId, fileType), objectMapper.writeValueAsBytes(any));
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public void storeObject(String dossierId, String fileId, FileType fileType, InputStream inputStream){
|
||||
|
||||
storageService.storeObject(StorageIdUtils.getStorageId(dossierId, fileId, fileType), inputStream);
|
||||
}
|
||||
|
||||
|
||||
public ImportedRedactions getImportedRedactions(String dossierId, String fileId) {
|
||||
|
||||
|
||||
@ -10,15 +10,14 @@ import org.springframework.core.io.InputStreamResource;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.io.InputStream;
|
||||
import java.util.*;
|
||||
|
||||
public class FileSystemBackedStorageService extends S3StorageService {
|
||||
public class FileSystemBackedStorageService implements StorageService{
|
||||
|
||||
private final Map<String, File> dataMap = new HashMap<>();
|
||||
|
||||
public FileSystemBackedStorageService() {
|
||||
super(null, null);
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
@ -33,16 +32,45 @@ public class FileSystemBackedStorageService extends S3StorageService {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteObject(String objectId) {
|
||||
dataMap.remove(objectId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean objectExists(String objectId) {
|
||||
return dataMap.containsKey(objectId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
|
||||
}
|
||||
|
||||
public List<String> listPaths(){
|
||||
return new ArrayList<>(dataMap.keySet());
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
@Override
|
||||
public void storeObject(String objectId, byte[] data) {
|
||||
File tempFile = File.createTempFile("test", ".tmp");
|
||||
File tempFile = File.createTempFile("storage", objectId.replace("/","-"));
|
||||
|
||||
IOUtils.write(data, new FileOutputStream(tempFile));
|
||||
|
||||
dataMap.put(objectId, tempFile);
|
||||
}
|
||||
|
||||
@Override
|
||||
@SneakyThrows
|
||||
public void storeObject(String objectId, InputStream stream) {
|
||||
File tempFile = File.createTempFile("test", ".tmp");
|
||||
|
||||
IOUtils.copy(stream, new FileOutputStream(tempFile));
|
||||
|
||||
dataMap.put(objectId, tempFile);
|
||||
}
|
||||
|
||||
public void clearStorage() {
|
||||
this.dataMap.forEach((k, v) -> {
|
||||
v.delete();
|
||||
|
||||
@ -0,0 +1,95 @@
|
||||
package com.iqser.red.service.redaction.v1.server.realdata;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.MessageType;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.junit.Test;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Slf4j
|
||||
public class AnalyseFileRealDataIntegrationTest extends LiveDataIntegrationTest {
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testFile() {
|
||||
|
||||
ObjectMapper om = new ObjectMapper();
|
||||
om.registerModule(new JavaTimeModule());
|
||||
|
||||
var file = new ClassPathResource(BASE_DIR + "data/test-file.pdf").getInputStream();
|
||||
var nerData = new ClassPathResource(BASE_DIR + "data/test-file.ner.json").getInputStream();
|
||||
var text = new ClassPathResource(BASE_DIR + "data/test-file.text.json").getInputStream();
|
||||
var sectionText = new ClassPathResource(BASE_DIR + "data/test-file.section-grid.json").getInputStream();
|
||||
var redactionLog = new ClassPathResource(BASE_DIR + "data/test-file.redaction-log.json").getInputStream();
|
||||
|
||||
redactionStorageService.storeObject("dossierId", "fileId", FileType.ORIGIN, file);
|
||||
redactionStorageService.storeObject("dossierId", "fileId", FileType.NER_ENTITIES, nerData);
|
||||
redactionStorageService.storeObject("dossierId", "fileId", FileType.TEXT, text);
|
||||
redactionStorageService.storeObject("dossierId", "fileId", FileType.SECTION_GRID, sectionText);
|
||||
redactionStorageService.storeObject("dossierId", "fileId", FileType.REDACTION_LOG, redactionLog);
|
||||
|
||||
AnalyzeRequest ar = AnalyzeRequest.builder()
|
||||
.fileId("fileId")
|
||||
.dossierId("dossierId")
|
||||
.analysisNumber(1)
|
||||
.dossierTemplateId("dossierTemplateId")
|
||||
.lastProcessed(OffsetDateTime.now())
|
||||
.excludedPages(Set.of())
|
||||
.fileAttributes(List.of())
|
||||
.messageType(MessageType.STRUCTURE_ANALYSE)
|
||||
.build();
|
||||
|
||||
String in = om.writeValueAsString(ar);
|
||||
// redactionMessageReceiver.receiveAnalyzeRequest(in, false);
|
||||
// log.warn("done structure");
|
||||
|
||||
var txt = redactionStorageService.getText("dossierId", "fileId");
|
||||
|
||||
var totalText = txt.getSectionTexts().stream().map(SectionText::getText).collect(Collectors.joining("\n"));
|
||||
System.out.println(totalText.length());
|
||||
|
||||
|
||||
|
||||
|
||||
// ar.setMessageType(MessageType.ANALYSE);
|
||||
// in = om.writeValueAsString(ar);
|
||||
// redactionMessageReceiver.receiveAnalyzeRequest(in, false);
|
||||
// log.warn("done analyze");
|
||||
|
||||
|
||||
simulateIncrement(List.of("study"),"PII",3L);
|
||||
|
||||
ar.setMessageType(MessageType.REANALYSE);
|
||||
in = om.writeValueAsString(ar);
|
||||
redactionMessageReceiver.receiveAnalyzeRequest(in, false);
|
||||
log.warn("done analyze");
|
||||
|
||||
var log = redactionStorageService.getRedactionLog("dossierId", "fileId");
|
||||
om.writeValue(new FileOutputStream("/tmp/test-file.redaction-log.json"), log);
|
||||
|
||||
System.out.println(log.getRedactionLogEntry().size());
|
||||
}
|
||||
|
||||
|
||||
// public static long hash(char[]){
|
||||
// return ((value % prime) + prime) % prime;
|
||||
// }
|
||||
// public static long getBiggerPrime() {
|
||||
// BigInteger prime = BigInteger.probablePrime(getNumberOfBits(10) + 1, new Random());
|
||||
// return prime.longValue();
|
||||
// }
|
||||
// private static int getNumberOfBits(int number) {
|
||||
// return Integer.SIZE - Integer.numberOfLeadingZeros(number);
|
||||
// }
|
||||
}
|
||||
@ -0,0 +1,192 @@
|
||||
package com.iqser.red.service.redaction.v1.server.realdata;
|
||||
|
||||
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.common.JSONPrimitive;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
|
||||
import com.iqser.red.service.redaction.v1.server.Application;
|
||||
import com.iqser.red.service.redaction.v1.server.FileSystemBackedStorageService;
|
||||
import com.iqser.red.service.redaction.v1.server.client.*;
|
||||
import com.iqser.red.service.redaction.v1.server.queue.RedactionMessageReceiver;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import lombok.SneakyThrows;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.core.io.support.ResourcePatternResolver;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
|
||||
import static org.mockito.ArgumentMatchers.*;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@RunWith(SpringRunner.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@Import(LiveDataIntegrationTest.RedactionIntegrationTestConfiguration.class)
|
||||
public class LiveDataIntegrationTest {
|
||||
|
||||
protected static String BASE_DIR = "performance/";
|
||||
|
||||
protected static String EFSA_SANITISATION_GFL_V1 = "dictionaries/EFSA_sanitisation_GFL_v1/";
|
||||
|
||||
@MockBean
|
||||
protected DictionaryClient dictionaryClient;
|
||||
|
||||
@MockBean
|
||||
protected EntityRecognitionClient entityRecognitionClient;
|
||||
|
||||
@MockBean
|
||||
private FileStatusProcessingUpdateClient fileStatusProcessingUpdateClient;
|
||||
|
||||
@MockBean
|
||||
private LegalBasisClient legalBasisClient;
|
||||
|
||||
@Autowired
|
||||
private ResourcePatternResolver resourcePatternResolver;
|
||||
|
||||
@MockBean
|
||||
private RulesClient rulesClient;
|
||||
|
||||
@Autowired
|
||||
protected DictionaryService dictionaryService;
|
||||
|
||||
@Autowired
|
||||
protected RedactionStorageService redactionStorageService;
|
||||
|
||||
@Autowired
|
||||
protected RedactionMessageReceiver redactionMessageReceiver;
|
||||
|
||||
@Autowired
|
||||
protected FileSystemBackedStorageService fileSystemBackedStorageService;
|
||||
|
||||
private List<Type> types;
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class, StorageAutoConfiguration.class})
|
||||
public static class RedactionIntegrationTestConfiguration {
|
||||
|
||||
@Bean
|
||||
@Primary
|
||||
public StorageService inmemoryStorage() {
|
||||
|
||||
return new FileSystemBackedStorageService();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
@Before
|
||||
public void prepareTest() {
|
||||
|
||||
when(dictionaryClient.getVersion(anyString())).thenReturn(1L);
|
||||
when(dictionaryClient.getVersionForDossier(anyString())).thenReturn(1L);
|
||||
|
||||
var rules = IOUtils.toString(new ClassPathResource(BASE_DIR + EFSA_SANITISATION_GFL_V1 + "rules.drl").getInputStream());
|
||||
when(rulesClient.getRules(any())).thenReturn(JSONPrimitive.of(rules));
|
||||
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
|
||||
var jsonNode = objectMapper.readTree(new ClassPathResource(BASE_DIR + EFSA_SANITISATION_GFL_V1 + "types.json").getInputStream());
|
||||
types = objectMapper.treeToValue(jsonNode.get("types"), objectMapper.getTypeFactory().constructType(
|
||||
new TypeReference<List<Type>>() {
|
||||
}));
|
||||
|
||||
types.forEach(t -> {
|
||||
t.setId(t.getType());
|
||||
t.setVersion(0L);
|
||||
});
|
||||
|
||||
when(dictionaryClient.getAllTypesForDossierTemplate(anyString(), anyBoolean())).thenReturn(types);
|
||||
when(dictionaryClient.getAllTypesForDossier(anyString(), anyBoolean())).thenReturn(new ArrayList<>());
|
||||
|
||||
|
||||
when(dictionaryClient.getColors(anyString())).thenReturn(objectMapper.readValue(new ClassPathResource(BASE_DIR + EFSA_SANITISATION_GFL_V1 + "colors.json").getInputStream(), Colors.class));
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(anyString(), nullable(Long.class))).then(answer -> {
|
||||
String typeName = answer.getArgument(0);
|
||||
|
||||
var found = types.stream().filter(t -> t.getType().equalsIgnoreCase(typeName)).findFirst();
|
||||
if(found.isPresent()) {
|
||||
var type = types.stream().filter(t -> t.getType().equalsIgnoreCase(typeName)).findFirst().get();
|
||||
type.setEntries(getEntries(typeName, type.getTypeId()));
|
||||
|
||||
return type;
|
||||
}else{
|
||||
return null;
|
||||
}
|
||||
});
|
||||
|
||||
dictionaryService.updateDictionary("dossierTemplateId","dossierId");
|
||||
}
|
||||
|
||||
public void simulateIncrement(List<String> values, String deltaTypeName, long version) {
|
||||
when(dictionaryClient.getVersion(anyString())).thenReturn(version);
|
||||
when(dictionaryClient.getVersionForDossier(anyString())).thenReturn(version);
|
||||
|
||||
when(dictionaryClient.getDictionaryForType(anyString(), nullable(Long.class))).then(answer -> {
|
||||
String typeName = answer.getArgument(0);
|
||||
|
||||
var found = types.stream().filter(t -> t.getType().equalsIgnoreCase(typeName)).findFirst();
|
||||
if(found.isPresent()) {
|
||||
|
||||
var type = types.stream().filter(t -> t.getType().equalsIgnoreCase(typeName)).findFirst().get();
|
||||
|
||||
if (deltaTypeName.equalsIgnoreCase(typeName)) {
|
||||
type.setEntries(values.stream().map(l -> new DictionaryEntry(0, l, version, false, type.getTypeId())).collect(Collectors.toList()));
|
||||
} else {
|
||||
type.setEntries(new ArrayList<>());
|
||||
}
|
||||
|
||||
return type;
|
||||
}else{
|
||||
return null;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testUpdateDictionary() {
|
||||
dictionaryService.updateDictionary("dossierTemplateId", "dossierId");
|
||||
|
||||
var dict = dictionaryService.getDeepCopyDictionary("dossierTemplateId", "dossierId");
|
||||
assertThat(dict.getLocalAccessMap().size()).isEqualTo(12);
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
private List<DictionaryEntry> getEntries(String typeName, String typeId) {
|
||||
Resource[] dictionaryResources = resourcePatternResolver.getResources("classpath:" + BASE_DIR + EFSA_SANITISATION_GFL_V1 + "dictionaries/**");
|
||||
for (var resource : dictionaryResources) {
|
||||
if (resource.getFilename().contains(typeName)) {
|
||||
|
||||
List<String> lines = IOUtils.readLines(resource.getInputStream());
|
||||
return lines.stream().map(l -> new DictionaryEntry(0, l, 0L, false, typeId)).collect(Collectors.toList());
|
||||
|
||||
}
|
||||
}
|
||||
return new ArrayList<>();
|
||||
}
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
Binary file not shown.
@ -0,0 +1,12 @@
|
||||
{
|
||||
"defaultColor": "#9398a0",
|
||||
"requestAdd": "#04b093",
|
||||
"requestRemove": "#04b093",
|
||||
"notRedacted": "#c498fa",
|
||||
"analysisColor": "#dd4d50",
|
||||
"updatedColor": "#fdbd00",
|
||||
"dictionaryRequestColor": "#5b97db",
|
||||
"manualRedactionColor": "#9398a0",
|
||||
"previewColor": "#9398a0",
|
||||
"ignoredHintColor": "#e7d4ff"
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,21 @@
|
||||
5 Batch
|
||||
Author
|
||||
Batch Analysis
|
||||
Certificate of Analysis
|
||||
Chemical Investigator
|
||||
Contributors
|
||||
Five Batch
|
||||
GLP Certificate
|
||||
Pathologist
|
||||
Performing Laboratory
|
||||
Principle Investigator
|
||||
Quality Assurance
|
||||
References
|
||||
Report of Peer Review
|
||||
Study Director
|
||||
Study Monitor
|
||||
Test Facility
|
||||
Test Site
|
||||
Trial Site
|
||||
altitude
|
||||
latitude
|
||||
@ -0,0 +1,234 @@
|
||||
. Comparative Biochemistry and Physiology Part C: Toxicology & Pharmacology C Toxicology & Pharmacology
|
||||
. J. Exp. Zool
|
||||
. Lewis Publ., Boca Raton,
|
||||
1944; 82:377-390
|
||||
7th ed.; The Iowa State University Press: Ames, IA
|
||||
7th ed.; The Joint Nature ConservationIowa State University Press: Ames, IAReproduction, Fertility and Development 2
|
||||
ATLA
|
||||
Academic Press
|
||||
Advances in
|
||||
Ageing Dev.
|
||||
Agromedicine
|
||||
Am J Epidemiol
|
||||
Am J Ind Med
|
||||
Am J Med Genet A
|
||||
Am J Respir Crit Care Med
|
||||
American Society for Testing and Materials, Philadelphia, Pennsylvania.
|
||||
Anal Chem.
|
||||
Anal, Chem.,
|
||||
Animal Reproduction Science
|
||||
Annals of
|
||||
Annals of Maths Stats
|
||||
Apidologie
|
||||
Aq. Toxicol.
|
||||
Aquatic Ecology
|
||||
Aquatic Toxicology
|
||||
Aquatic toxicology
|
||||
Arch Environ Contam Toxicol
|
||||
Arch Toxicol
|
||||
Arch Toxicol.
|
||||
Archives of Environmental Contamination and Toxicology
|
||||
Archives of Toxicology
|
||||
Australasian Journal of Ecotoxicology
|
||||
BioFactors
|
||||
Biomedical chromatography
|
||||
Biometrics
|
||||
Biometrika
|
||||
Birth Defects Res. B. Dev. Reprod. Toxicol
|
||||
Birth Defects Res. B. Dev. Reprod. Toxicol.
|
||||
Br Med J
|
||||
British Health and Safety Executive, Chemicals Regulation Directorate Pesticides
|
||||
Brown Company Publishers, Dubuque, Iowa
|
||||
Bull Environ Contam Toxicol.
|
||||
Bull. Environ. Contam. Toxicol
|
||||
Bulletin
|
||||
Bulletin of Environmental Contamination and Toxicology
|
||||
CHEMOSPHERE
|
||||
Cahiers Agricultures
|
||||
Cancer Letters
|
||||
Cancer Res
|
||||
Carcinogenesis
|
||||
Chem. Res. Toxicol.
|
||||
Chem. Res. in Toxicol.
|
||||
Chemosphere
|
||||
Chinese Journal of Chromatography
|
||||
Clin Anat.
|
||||
Comparative Biochemistry and Physiology Part C: Toxicology & Pharmacology C Toxicology & Pharmacology
|
||||
Crit Rev Toxicol
|
||||
Critical Reviews in Toxicology
|
||||
Curr. Med. Chem.
|
||||
Current approaches in the statistical analysis of ecotoxicity data: guidance to application
|
||||
DEFRA
|
||||
Deutsche Lebensmittel-Rundschau
|
||||
Deutsche Lebensmittel-Rundschau.
|
||||
Dongbei Nongye Daxue Xuebao
|
||||
Dual-Gold
|
||||
EFSA Journal
|
||||
Ecological Research
|
||||
Ecotoxicol. Environ. Safety
|
||||
Ecotoxicology and Environmental Safety
|
||||
Endocrinology
|
||||
Env. Mol. Mutagen
|
||||
Environ Health Perspect
|
||||
Environ Health Perspect.
|
||||
Environ Health Perspect. 1
|
||||
Environ Mutat
|
||||
Environ Sci Pollut Res
|
||||
Environ Sci Technol
|
||||
Environ Toxicol.
|
||||
Environ and Molecular Mutagenesis
|
||||
Environ. Health Perspec.
|
||||
Environ. Health Perspect.
|
||||
Environ. Sci. Technol
|
||||
Environ. Sci. Technol.
|
||||
Environment International
|
||||
Environmental & Molecular Mutagenesis
|
||||
Environmental Health
|
||||
Environmental Health Perspectives
|
||||
Environmental Monitoring and Assessment
|
||||
Environmental Pollution
|
||||
Environmental Protection Agency Federal Register
|
||||
Environmental Science & Technology
|
||||
Environmental Science and Technology
|
||||
Environmental Science and Technology.
|
||||
Environmental Toxicology and Chemistry
|
||||
Environmental monitoring and assessment
|
||||
Environmental toxicology and chemistry
|
||||
Essays in Honor of Harold Hotelling
|
||||
Eur Respir
|
||||
Experimental Cel] Research
|
||||
FUNDAMENTAL AND APPLIED TOXICOLOGY
|
||||
Fischer, Jena
|
||||
Fish Sci
|
||||
Food Cosmet. Toxicol.
|
||||
Franckh, Stuttgar
|
||||
Franckh, Stuttgart
|
||||
Free Rad. Res
|
||||
Fundamentals of Applied Toxicology
|
||||
Fundamentals of Applied Toxicology1988
|
||||
Gen. Comp. Endocrinol
|
||||
Gen. Comp.Endocrinol
|
||||
Gen.Com. Endocrinol.
|
||||
General and Comparative Endocrinology
|
||||
Greenhouse Pest Management
|
||||
Gustav Fischer Verlag, Stuttgart
|
||||
Handbook of water purity and quality.
|
||||
Health and Safety Executive, Chemi-cals Regulation Directorate Pesticides
|
||||
Herpetologica
|
||||
High-Throughput Screening Methods in Toxicity Testing
|
||||
High-Throughput Screening Methods in Toxicity Testing. Hoboken, NJ: John Wiley & Sons
|
||||
ISBN
|
||||
ISSN
|
||||
ISSN/ISBN
|
||||
Ibis
|
||||
Int Arch Occup Environ Health
|
||||
Int J Cancer
|
||||
Irish Journal of Agricultural and Food Research
|
||||
Ist Annual Symposium on Aquatic Toxicology ASTM,
|
||||
J Econom
|
||||
J Endocrinol
|
||||
J LARGILLIERE
|
||||
J Radiat Res
|
||||
J Steroid Biochem Mol Biol
|
||||
J. Invest. Derm
|
||||
Joint Nature Conservation Committee
|
||||
Journal Pharmacology Experimental Therapeutics
|
||||
Journal of
|
||||
Journal of Animal Ecology
|
||||
Journal of Applied Ecology
|
||||
Journal of Applied Entomology
|
||||
Journal of Economic Entomology
|
||||
Journal of Environmental Science and Health
|
||||
Journal of Experimental Biology and Ecology
|
||||
Journal of Hazardous Materials
|
||||
Journal of Microbiology
|
||||
Journal of Toxicology and
|
||||
Journal of agricultural and food chemistry
|
||||
Journal of the American College of Toxicology
|
||||
Journal of the Royal Statistical Society,
|
||||
Leaflet
|
||||
Limnologica
|
||||
Marine Enironmental Research
|
||||
Marine Environmental Research
|
||||
Marine Pollution Bulletin
|
||||
Mass Spectrometry
|
||||
Medical Journal
|
||||
Middle Atlantic Reproduction and Teratology Association
|
||||
Mol Mutagen
|
||||
Mol. Cell. Endocrinol.
|
||||
Mutagenesis
|
||||
Mutat Res
|
||||
Mutation Research
|
||||
National Academy of Science,
|
||||
Nature
|
||||
Neurotoxicity research
|
||||
Nonparametric Statistics for the Behavioral Sciences
|
||||
OECD Guideline for the testing of chemicals
|
||||
OECD Guidelines for Testing of Chemicals
|
||||
OECD Guidelines for testing of chemicals.
|
||||
OECD Principles of Good Laboratory Practice and Compliance Monitoring
|
||||
OECD Series on Testing Assessment
|
||||
Occup Environ Med
|
||||
Occupational and Environmental Medicine
|
||||
Office of Prevention, Pesticides and Toxic Substances
|
||||
Official Journal of the European Communities
|
||||
Oxford University Press
|
||||
PLoS ONE
|
||||
Pediat. Res
|
||||
Pesticide Biochemistry and Physiology
|
||||
Physiol Chem
|
||||
Planta daninha
|
||||
Press
|
||||
Principles and Procedures of Statistics
|
||||
Principles and Procedures of Statistics, A Biometrical Approach
|
||||
Proc Natl Acad Sci USA
|
||||
Psychopharmacologia
|
||||
Publication
|
||||
Publications Office of the European Union, Luxembourg
|
||||
Published: Y
|
||||
Publisher Gustav Fischer, Stuttgart
|
||||
Raven press,
|
||||
Reg. Tox. Pharmacol
|
||||
Regul. Toxicol. Pharmacol
|
||||
Regulatory Toxicology and Pharmacology
|
||||
Reproductive BioMedicine Online
|
||||
Reproductive Toxicology
|
||||
Schweizerbart’sche Verlagsbuchhandlung
|
||||
Science of the Total Environment
|
||||
Scientific Reports
|
||||
Stain Technol
|
||||
Superorder Monogonnata
|
||||
Swiss Ordinance relating to Good Laboratory Practice
|
||||
Teratology
|
||||
The American Statistician
|
||||
The Chemical Society,
|
||||
The Inland Waters Volume
|
||||
The Royal Society for the Protection of Birds
|
||||
Tidepool Scientific Software, Mckinleyville, CA, USA
|
||||
Toxicol Chem
|
||||
Toxicol Pathol.
|
||||
Toxicol Sci
|
||||
Toxicol Sci.
|
||||
Toxicol Sci. 1
|
||||
Toxicol in Vitro
|
||||
Toxicol. Sci,
|
||||
Toxicol. Sci.
|
||||
Toxicol. in Vitro
|
||||
Toxicologic Pathology
|
||||
Toxicological Sciences
|
||||
Toxicological and Environmental Chemistry
|
||||
Toxicology Letters
|
||||
Toxicology and Applied Pharmacology
|
||||
Toxicology and Industrial Health
|
||||
Toxicology in Vitro
|
||||
Trends in Neurosciences
|
||||
Umweltbundesamt Development & Research Project
|
||||
WATER CHLORINATION:CHEM, ENVIRON. IMPACT HEALTH EFF., PROC. CONF
|
||||
Water Res.
|
||||
Workshop
|
||||
World Journal of Pharmaceutical Research
|
||||
XIII Symposium Pesticide Chemistry
|
||||
edition
|
||||
http://www.iobc-wprs.org
|
||||
ournal of the Royal Statistical Society
|
||||
paper was presented
|
||||
@ -0,0 +1,22 @@
|
||||
{
|
||||
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
|
||||
"name": "EFSA sanitisation GFL v1",
|
||||
"description": "Redact all authors, laboratory addresses and other geolocation.",
|
||||
"dateAdded": "2021-01-21T14:25:35Z",
|
||||
"dateModified": "2021-07-19T08:09:27Z",
|
||||
"createdBy": "e3aed6ea-a9e5-4f3f-bde7-6f0fe0c4362c",
|
||||
"modifiedBy": "e3aed6ea-a9e5-4f3f-bde7-6f0fe0c4362c",
|
||||
"validFrom": null,
|
||||
"validTo": null,
|
||||
"downloadFileTypes": [
|
||||
"PREVIEW",
|
||||
"REDACTED",
|
||||
"DELTA_PREVIEW"
|
||||
],
|
||||
"reportTypes": [
|
||||
"EXCEL_MULTI_FILE",
|
||||
"WORD_SINGLE_FILE_APPENDIX_A1_TEMPLATE",
|
||||
"WORD_SINGLE_FILE_APPENDIX_A2_TEMPLATE"
|
||||
],
|
||||
"dossierTemplateStatus": "ACTIVE"
|
||||
}
|
||||
@ -0,0 +1,47 @@
|
||||
[
|
||||
{
|
||||
"name": "1.1 personal data (incl. geolocation); Article 39(e)(3)",
|
||||
"description": "(Regulations (EU) 2016/679 and (EU) 2018/1725 shall apply to the processing of personal data carried out pursuant to this Regulation. Any personal data made public pursuant to Article 38 of this Regulation and this Article shall only be used to ensure the transparency of the risk assessment under this Regulation and shall not be further processed in a manner that is incompatible with these purposes, in accordance with point (b) of Article 5(1) of Regulation (EU) 2016/679 and point (b) of Article 4(1) of Regulation (EU) 2018/1725, as the case may be)",
|
||||
"reason": "Article 39(e)(3) of Regulation (EC) No 178/2002"
|
||||
},
|
||||
{
|
||||
"name": "1.2 vertebrate study related personal data (incl. geolocation); Article 39(e)(2)",
|
||||
"description": "personal data (names and addresses) of individuals involved in testing on vertebrate studies or in obtaining toxicological information",
|
||||
"reason": "Article 39(e)(2) of Regulation (EC) No 178/2002"
|
||||
},
|
||||
{
|
||||
"name": "2. manufacturing or production process",
|
||||
"description": "the manufacturing or production process, including the method and innovative aspects thereof, as well as other technical and industrial specifications inherent to that process or method, except for information which is relevant to the assessment of safety",
|
||||
"reason": "Article 63(2)(a) of Regulation (EC) No 1107/2009 (making reference to Article 39 of Regulation EC No 178/2002)"
|
||||
},
|
||||
{
|
||||
"name": "3. links between a producer and applicant",
|
||||
"description": "commercial links between a producer or importer and the applicant or the authorisation holder, where applicable",
|
||||
"reason": "Article 63(2)(a) of Regulation (EC) No 1107/2009 (making reference to Article 39 of Regulation EC No 178/2002)"
|
||||
},
|
||||
{
|
||||
"name": "4. commercial information",
|
||||
"description": "commercial information revealing sourcing, market shares or business strategy of the applicant",
|
||||
"reason": "Article 63(2)(a) of Regulation (EC) No 1107/2009 (making reference to Article 39 of Regulation EC No 178/2002)"
|
||||
},
|
||||
{
|
||||
"name": "5. quantitative composition",
|
||||
"description": "quantitative composition of the subject matter of the request, except for information which is relevant to the assessment of safety",
|
||||
"reason": "Article 63(2)(a) of Regulation (EC) No 1107/2009 (making reference to Article 39 of Regulation EC No 178/2002)"
|
||||
},
|
||||
{
|
||||
"name": "6. specification of impurity",
|
||||
"description": "the specification of impurity of the active substance and the related methods of analysis for impurities in the active substance as manufactured, except for the impurities that are considered to be toxicologically, ecotoxicologically or environmentally relevant and the related methods of analysis for such impurities",
|
||||
"reason": "Article 63(2)(b) of Regulation (EC) No 1107/2009"
|
||||
},
|
||||
{
|
||||
"name": "7. results of production batches",
|
||||
"description": "results of production batches of the active substance including impurities",
|
||||
"reason": "Article 63(2)(c) of Regulation (EC) No 1107/2009"
|
||||
},
|
||||
{
|
||||
"name": "8. composition of a plant protection product",
|
||||
"description": "information on the complete composition of a plant protection product",
|
||||
"reason": "Article 63(2)(d) of Regulation (EC) No 1107/2009"
|
||||
}
|
||||
]
|
||||
@ -0,0 +1,480 @@
|
||||
package drools
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section
|
||||
|
||||
global Section section
|
||||
|
||||
|
||||
// --------------------------------------- CBI rules -------------------------------------------------------------------
|
||||
rule "0: Add CBI_author from ai"
|
||||
when
|
||||
Section(aiMatchesType("CBI_author"))
|
||||
then
|
||||
section.addAiEntities("CBI_author", "CBI_author");
|
||||
end
|
||||
|
||||
|
||||
rule "0: Combine ai types CBI_author from ai"
|
||||
when
|
||||
Section(aiMatchesType("ORG"))
|
||||
then
|
||||
section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false);
|
||||
end
|
||||
|
||||
|
||||
rule "1: Redact CBI Authors (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author"))
|
||||
then
|
||||
section.redact("CBI_author", 1, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "2: Redact CBI Authors (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_author"))
|
||||
then
|
||||
section.redact("CBI_author", 2, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "3: Redact not CBI Address (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address"))
|
||||
then
|
||||
section.redactNot("CBI_address", 3, "Address found for non vertebrate study");
|
||||
section.ignoreRecommendations("CBI_address");
|
||||
end
|
||||
|
||||
|
||||
rule "4: Redact CBI Address (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("CBI_address"))
|
||||
then
|
||||
section.redact("CBI_address", 4, "Address found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "5: Do not redact genitive CBI_author"
|
||||
when
|
||||
Section(matchesType("CBI_author"))
|
||||
then
|
||||
section.expandToFalsePositiveByRegEx("CBI_author", "['’’'ʼˈ´`‘′ʻ’']s", false, 0);
|
||||
end
|
||||
|
||||
|
||||
rule "6: Redact Author(s) cells in Tables with Author(s) header (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N"))
|
||||
then
|
||||
section.redactCell("Author(s)", 6, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "7: Redact Author(s) cells in Tables with Author(s) header (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author(s)") && !hasTableHeader("Vertebrate study Y/N"))
|
||||
then
|
||||
section.redactCell("Author(s)", 7, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "8: Redact Author cells in Tables with Author header (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N"))
|
||||
then
|
||||
section.redactCell("Author", 8, "CBI_author", false, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "9: Redact Author cells in Tables with Author header (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && hasTableHeader("Author") && !hasTableHeader("Vertebrate study Y/N"))
|
||||
then
|
||||
section.redactCell("Author", 9, "CBI_author", false, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "10: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No")))
|
||||
then
|
||||
section.redactCell("Author(s)", 10, "CBI_author", true, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "11: Redact and recommand Authors in Tables with Vertebrate study Y/N header (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes") || rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No")))
|
||||
then
|
||||
section.redactCell("Author(s)", 11, "CBI_author", true, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "13: Redact addresses that start with BL or CTL"
|
||||
when
|
||||
Section(searchText.contains("BL") || searchText.contains("CT"))
|
||||
then
|
||||
section.redactNotAndRecommendByRegEx("((\\b((([Cc]T(([1ILli\\/])| L|~P))|(BL))[\\. ]?([\\dA-Ziltphz~\\/.:!]| ?[\\(',][Ppi](\\(e)?|([\\(-?']\\/))+( ?[\\(\\/\\dA-Znasieg]+)?)\\b( ?\\/? ?\\d+)?)|(\\bCT[L1i]\\b))", true, 0, "CBI_address", 13, "Laboratory for vertebrate studies found");
|
||||
end
|
||||
|
||||
|
||||
rule "14: Redact and add recommendation for et al. author (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al"))
|
||||
then
|
||||
section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 14, "Author found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "15: Redact and add recommendation for et al. author (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("et al"))
|
||||
then
|
||||
section.redactAndRecommendByRegEx("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", false, 1, "CBI_author", 15, "Author found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "16: Add recommendation for Addresses in Test Organism sections"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species:") && searchText.contains("Source:"))
|
||||
then
|
||||
section.recommendLineAfter("Source:", "CBI_address");
|
||||
end
|
||||
|
||||
|
||||
rule "17: Add recommendation for Addresses in Test Animals sections"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("Species") && searchText.contains("Source"))
|
||||
then
|
||||
section.recommendLineAfter("Source", "CBI_address");
|
||||
end
|
||||
|
||||
|
||||
rule "18: Do not redact Names and Addresses if Published Information found"
|
||||
when
|
||||
Section(matchesType("published_information"))
|
||||
then
|
||||
section.redactNotAndReference("CBI_author","published_information", 18, "Published Information found");
|
||||
section.redactNotAndReference("CBI_address","published_information", 18, "Published Information found");
|
||||
end
|
||||
|
||||
|
||||
// --------------------------------------- PII rules -------------------------------------------------------------------
|
||||
|
||||
|
||||
rule "19: Redacted PII Personal Identification Information (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII"))
|
||||
then
|
||||
section.redact("PII", 19, "PII (Personal Identification Information) found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "20: Redacted PII Personal Identification Information (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesType("PII"))
|
||||
then
|
||||
section.redact("PII", 20, "PII (Personal Identification Information) found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "21: Redact Emails by RegEx (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@"))
|
||||
then
|
||||
section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 21, "PII (Personal Identification Information) found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "22: Redact Emails by RegEx (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("@"))
|
||||
then
|
||||
section.redactByRegEx("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", true, 1, "PII", 22, "PII (Personal Identification Information) found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "23: Redact telephone numbers by RegEx (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && containsRegEx("[+]\\d{2,}", true))
|
||||
then
|
||||
section.redactByRegEx("((([+]\\d{2,3} (\\d{7,12})\\b)|([+]\\d{2,3}(\\d{3,12})\\b|[+]\\d{2,3}([ -]\\(?\\d{2,6}\\)?){2,4})|[+]\\d{2,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", true, 1, "PII", 23, "PII (Personal Identification Information) found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "24: Redact telephone numbers by RegEx (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && containsRegEx("[+]\\d{2,}", true))
|
||||
then
|
||||
section.redactByRegEx("((([+]\\d{2,3} (\\d{7,12})\\b)|([+]\\d{2,3}(\\d{3,12})\\b|[+]\\d{2,3}([ -]\\(?\\d{2,6}\\)?){2,4})|[+]\\d{2,3} ?((\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b)", true, 1, "PII", 24, "PII (Personal Identification Information) found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "25: Recommend telephone numbers by RegEx"
|
||||
when
|
||||
Section(containsRegEx("\\d{3,}", true) || containsRegEx("[+]\\d{2,}", true))
|
||||
then
|
||||
section.addRecommendationByRegEx("([+]\\d{2,3} (\\d{7,12})\\b|((([+]\\d{2,3}(\\d{3,12})\\b|[+]\\d{2,3}([ -]\\(?\\d{2,6}\\)?){2,4})|(\\(?\\b(\\d{2,6}\\)?)([ -]\\d{2,6}){1,4}))(-\\d{1,3})?\\b))", true, 1, "PII");
|
||||
end
|
||||
|
||||
|
||||
rule "26: Redact contact information (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (text.contains("Contact point:")
|
||||
|| text.contains("Phone:")
|
||||
|| text.contains("Fax:")
|
||||
|| text.contains("Tel.:")
|
||||
|| text.contains("Tel:")
|
||||
|| text.contains("E-mail:")
|
||||
|| text.contains("Email:")
|
||||
|| text.contains("e-mail:")
|
||||
|| text.contains("E-mail address:")
|
||||
|| text.contains("Alternative contact:")
|
||||
|| text.contains("Telephone number:")
|
||||
|| text.contains("Telephone No:")
|
||||
|| text.contains("Fax number:")
|
||||
|| text.contains("Telephone:")
|
||||
|| text.contains("Phone No.")
|
||||
|| text.contains("European contact:")))
|
||||
then
|
||||
section.redactLineAfter("Contact point:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Phone:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Fax:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Tel.:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Tel:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("E-mail:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Email:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("e-mail:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("E-mail address:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Contact:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Alternative contact:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Telephone number:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Telephone No:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Fax number:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Telephone:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Phone No.", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactBetween("No:", "Fax", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactBetween("Contact:", "Tel.:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("European contact:", "PII", 26, true, "Contact information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "27: Redact contact information (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (text.contains("Contact point:")
|
||||
|| text.contains("Phone:")
|
||||
|| text.contains("Fax:")
|
||||
|| text.contains("Tel.:")
|
||||
|| text.contains("Tel:")
|
||||
|| text.contains("E-mail:")
|
||||
|| text.contains("Email:")
|
||||
|| text.contains("e-mail:")
|
||||
|| text.contains("E-mail address:")
|
||||
|| text.contains("Alternative contact:")
|
||||
|| text.contains("Telephone number:")
|
||||
|| text.contains("Telephone No:")
|
||||
|| text.contains("Fax number:")
|
||||
|| text.contains("Telephone:")
|
||||
|| text.contains("Phone No.")
|
||||
|| text.contains("European contact:")))
|
||||
then
|
||||
section.redactLineAfter("Contact point:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Phone:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Fax:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Tel.:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Tel:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("E-mail:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Email:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("e-mail:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("E-mail address:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Contact:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Alternative contact:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Telephone number:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Telephone No:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Fax number:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Telephone:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Phone No.", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactBetween("No:", "Fax", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactBetween("Contact:", "Tel.:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("European contact:", "PII", 27, true, "Contact information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "28: Redact contact information if applicant is found (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (headlineContainsWord("applicant") || text.contains("Applicant") || headlineContainsWord("Primary contact") || headlineContainsWord("Alternative contact") || text.contains("Telephone number:")))
|
||||
then
|
||||
section.redactLineAfter("Contact point:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Phone:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Fax:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Tel.:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Tel:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("E-mail:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Email:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("e-mail:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("E-mail address:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Contact:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Alternative contact:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Telephone number:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Telephone No:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Fax number:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Telephone:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Phone No.", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactBetween("No:", "Fax", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactBetween("Contact:", "Tel.:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("European contact:", "PII", 28, true, "Applicant information was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "29: Redact contact information if applicant is found (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (headlineContainsWord("applicant") || text.contains("Applicant") || headlineContainsWord("Primary contact") || headlineContainsWord("Alternative contact") || text.contains("Telephone number:")))
|
||||
then
|
||||
section.redactLineAfter("Contact point:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Phone:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Fax:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Tel.:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Tel:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("E-mail:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Email:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("e-mail:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("E-mail address:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Contact:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Alternative contact:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Telephone number:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Telephone No:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Fax number:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Telephone:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Phone No.", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactBetween("No:", "Fax", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactBetween("Contact:", "Tel.:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("European contact:", "PII", 29, true, "Applicant information was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "30: Redact contact information if Producer is found (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (text.toLowerCase().contains("producer of the plant protection") || text.toLowerCase().contains("producer of the active substance") || text.contains("Manufacturer of the active substance") || text.contains("Manufacturer:") || text.contains("Producer or producers of the active substance")))
|
||||
then
|
||||
section.redactLineAfter("Contact:", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Telephone:", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Phone:", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Fax:", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("E-mail:", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Contact:", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Fax number:", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Telephone number:", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Tel:", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Phone No.", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactBetween("No:", "Fax", "PII", 30, true, "Producer was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "31: Redact contact information if Producer is found (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && (text.toLowerCase().contains("producer of the plant protection") || text.toLowerCase().contains("producer of the active substance") || text.contains("Manufacturer of the active substance") || text.contains("Manufacturer:") || text.contains("Producer or producers of the active substance")))
|
||||
then
|
||||
section.redactLineAfter("Contact:", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Telephone:", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Phone:", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Fax:", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("E-mail:", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Contact:", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Fax number:", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Telephone number:", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Tel:", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactLineAfter("Phone No.", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
section.redactBetween("No:", "Fax", "PII", 31, true, "Producer was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "32: Redact AUTHOR(S) (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("AUTHOR(S):") && searchText.contains("COMPLETION DATE:") && !searchText.contains("STUDY COMPLETION DATE:"))
|
||||
then
|
||||
section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 32, true, "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "33: Redact AUTHOR(S) (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("AUTHOR(S):") && searchText.contains("COMPLETION DATE:") && !searchText.contains("STUDY COMPLETION DATE:"))
|
||||
then
|
||||
section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 33, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "34: Redact AUTHOR(S) (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("AUTHOR(S):") && searchText.contains("STUDY COMPLETION DATE:"))
|
||||
then
|
||||
section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 34, true, "AUTHOR(S) was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "35: Redact AUTHOR(S) (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("AUTHOR(S):") && searchText.contains("STUDY COMPLETION DATE:"))
|
||||
then
|
||||
section.redactLinesBetween("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", 35, true, "AUTHOR(S) was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "36: Redact PERFORMING LABORATORY (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("PERFORMING LABORATORY:"))
|
||||
then
|
||||
section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 36, true, "PERFORMING LABORATORY was found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
section.redactNot("CBI_address", 36, "Performing laboratory found for non vertebrate study");
|
||||
end
|
||||
|
||||
|
||||
rule "37: Redact PERFORMING LABORATORY (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && searchText.contains("PERFORMING LABORATORY:"))
|
||||
then
|
||||
section.redactBetween("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", 37, true, "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
// --------------------------------------- other rules -------------------------------------------------------------------
|
||||
|
||||
rule "50: Purity Hint"
|
||||
when
|
||||
Section(searchText.toLowerCase().contains("purity"))
|
||||
then
|
||||
section.addHintAnnotationByRegEx("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", true, 1, "hint_only");
|
||||
end
|
||||
|
||||
|
||||
rule "51: Ignore dossier_redaction entries if confidential"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Confidentiality","confidential") && matchesType("dossier_redaction"));
|
||||
then
|
||||
section.ignore("dossier_redaction");
|
||||
end
|
||||
|
||||
|
||||
rule "52: Redact signatures (Non vertebrate study)"
|
||||
when
|
||||
Section(!fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature"))
|
||||
then
|
||||
section.redactImage("signature", 52, "Signature found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "53: Redact signatures (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("signature"))
|
||||
then
|
||||
section.redactImage("signature", 53, "Signature found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "54: Redact Logos (Vertebrate study)"
|
||||
when
|
||||
Section(fileAttributeByLabelEqualsIgnoreCase("Vertebrate Study","Yes") && matchesImageType("logo"))
|
||||
then
|
||||
section.redactImage("logo", 54, "Logo found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
@ -0,0 +1,185 @@
|
||||
{
|
||||
"types": [
|
||||
{
|
||||
"type": "CBI_address",
|
||||
"hexColor": "#9398a0",
|
||||
"recommendationHexColor": "#8df06c",
|
||||
"rank": 140,
|
||||
"hint": false,
|
||||
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
|
||||
"caseInsensitive": false,
|
||||
"recommendation": false,
|
||||
"description": "All site names and addresses, and location (e.g. Syngenta, Monthey, GPS Co-ordinates, Mr Smith of … providing the…). Except addresses in published literature and the applicant address.",
|
||||
"addToDictionaryAction": true,
|
||||
"label": "CBI Address",
|
||||
"hasDictionary": true,
|
||||
"systemManaged": false
|
||||
},
|
||||
{
|
||||
"type": "CBI_author",
|
||||
"hexColor": "#9398a0",
|
||||
"recommendationHexColor": "#8df06c",
|
||||
"rank": 130,
|
||||
"hint": false,
|
||||
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
|
||||
"caseInsensitive": false,
|
||||
"recommendation": false,
|
||||
"description": "All authors named in the study documentation. Except names in published literature.",
|
||||
"addToDictionaryAction": true,
|
||||
"label": "CBI Author",
|
||||
"hasDictionary": true,
|
||||
"systemManaged": false
|
||||
},
|
||||
{
|
||||
"type": "PII",
|
||||
"hexColor": "#9398a0",
|
||||
"recommendationHexColor": "#8df06c",
|
||||
"rank": 150,
|
||||
"hint": false,
|
||||
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
|
||||
"caseInsensitive": false,
|
||||
"recommendation": false,
|
||||
"description": "Not authors but listed in the document: Names, signatures, telephone, email etc.; e.g. Reg Manager, QA Manager",
|
||||
"addToDictionaryAction": true,
|
||||
"label": "PII",
|
||||
"hasDictionary": true,
|
||||
"systemManaged": false
|
||||
},
|
||||
{
|
||||
"type": "formula",
|
||||
"hexColor": "#036ffc",
|
||||
"recommendationHexColor": "#8df06c",
|
||||
"rank": 1002,
|
||||
"hint": true,
|
||||
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
|
||||
"caseInsensitive": true,
|
||||
"recommendation": false,
|
||||
"description": "Empty dictionary used to configure formula colors.",
|
||||
"addToDictionaryAction": false,
|
||||
"label": "Formula",
|
||||
"hasDictionary": false,
|
||||
"systemManaged": true
|
||||
},
|
||||
{
|
||||
"type": "hint_only",
|
||||
"hexColor": "#fa98f7",
|
||||
"recommendationHexColor": "#8df06c",
|
||||
"rank": 50,
|
||||
"hint": true,
|
||||
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
|
||||
"caseInsensitive": true,
|
||||
"recommendation": false,
|
||||
"description": "Entries of this dictionary will be highlighted only",
|
||||
"addToDictionaryAction": false,
|
||||
"label": "Hint Only",
|
||||
"hasDictionary": true,
|
||||
"systemManaged": false
|
||||
},
|
||||
{
|
||||
"type": "image",
|
||||
"hexColor": "#bdd6ff",
|
||||
"recommendationHexColor": "#8df06c",
|
||||
"rank": 999,
|
||||
"hint": true,
|
||||
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
|
||||
"caseInsensitive": true,
|
||||
"recommendation": false,
|
||||
"description": "Empty dictionary used to configure image colors.",
|
||||
"addToDictionaryAction": false,
|
||||
"label": "Image",
|
||||
"hasDictionary": false,
|
||||
"systemManaged": true
|
||||
},
|
||||
{
|
||||
"type": "logo",
|
||||
"hexColor": "#9398a0",
|
||||
"recommendationHexColor": "#8df06c",
|
||||
"rank": 1001,
|
||||
"hint": false,
|
||||
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
|
||||
"caseInsensitive": true,
|
||||
"recommendation": false,
|
||||
"description": "Empty dictionary used to configure logo colors.",
|
||||
"addToDictionaryAction": false,
|
||||
"label": "Logo",
|
||||
"hasDictionary": false,
|
||||
"systemManaged": true
|
||||
},
|
||||
{
|
||||
"type": "must_redact",
|
||||
"hexColor": "#9398a0",
|
||||
"recommendationHexColor": "#8df06c",
|
||||
"rank": 100,
|
||||
"hint": false,
|
||||
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
|
||||
"caseInsensitive": true,
|
||||
"recommendation": false,
|
||||
"description": "Entries of this dictionary get redacted wherever found.",
|
||||
"addToDictionaryAction": false,
|
||||
"label": "Must Redact",
|
||||
"hasDictionary": true,
|
||||
"systemManaged": false
|
||||
},
|
||||
{
|
||||
"type": "ocr",
|
||||
"hexColor": "#bdd6ff",
|
||||
"recommendationHexColor": "#8df06c",
|
||||
"rank": 1000,
|
||||
"hint": true,
|
||||
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
|
||||
"caseInsensitive": true,
|
||||
"recommendation": false,
|
||||
"description": "Empty dictionary used to configure ocr colors.",
|
||||
"addToDictionaryAction": false,
|
||||
"label": "Ocr",
|
||||
"hasDictionary": false,
|
||||
"systemManaged": true
|
||||
},
|
||||
{
|
||||
"type": "published_information",
|
||||
"hexColor": "#85ebff",
|
||||
"recommendationHexColor": "#8df06c",
|
||||
"rank": 70,
|
||||
"hint": true,
|
||||
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
|
||||
"caseInsensitive": false,
|
||||
"recommendation": false,
|
||||
"description": "Manual managed list of public journals and papers that need no redaction",
|
||||
"addToDictionaryAction": true,
|
||||
"label": "Published Information",
|
||||
"hasDictionary": true,
|
||||
"systemManaged": false
|
||||
},
|
||||
{
|
||||
"type": "signature",
|
||||
"hexColor": "#9398a0",
|
||||
"recommendationHexColor": "#8df06c",
|
||||
"rank": 1003,
|
||||
"hint": false,
|
||||
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
|
||||
"caseInsensitive": true,
|
||||
"recommendation": false,
|
||||
"description": "Empty dictionary used to configure signature colors.",
|
||||
"addToDictionaryAction": false,
|
||||
"label": "Signature",
|
||||
"hasDictionary": false,
|
||||
"systemManaged": true
|
||||
},
|
||||
{
|
||||
"type": "imported_redaction",
|
||||
"hexColor": "#f0f0c0",
|
||||
"recommendationHexColor": "#8df06c",
|
||||
"rank": 9999,
|
||||
"hint": false,
|
||||
"dossierTemplateId": "b959cf8a-e8f8-470b-aad4-6fc602ef110b",
|
||||
"caseInsensitive": false,
|
||||
"recommendation": false,
|
||||
"description": "Redaction Annotations that were imported from documents",
|
||||
"addToDictionaryAction": false,
|
||||
"label": "Imported Redaction",
|
||||
"hasDictionary": false,
|
||||
"systemManaged": true,
|
||||
"autoHideSkipped": true
|
||||
}
|
||||
]
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user