RED-2836: Enabled false positives per dictionary

This commit is contained in:
deiflaender 2022-03-21 12:47:14 +01:00
parent 17b885d901
commit 88e6ac3d22
21 changed files with 223 additions and 416 deletions

View File

@ -27,6 +27,7 @@ public class RedactionLogEntry {
private boolean redacted;
private boolean isHint;
private boolean isRecommendation;
private boolean isFalsePositive;
private String section;
private float[] color;

View File

@ -22,4 +22,6 @@ public class RedactionRequest {
private ManualRedactions manualRedactions;
@Builder.Default
private Set<Integer> excludedPages = new HashSet<>();
private boolean includeFalsePositives;
}

View File

@ -198,7 +198,11 @@ public class RedactionController implements RedactionResource {
}
log.info("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion());
return redactionLogMergeService.mergeRedactionLogData(redactionLog, sectionGrid, redactionRequest.getDossierTemplateId(), redactionRequest.getManualRedactions(), redactionRequest.getExcludedPages());
var merged = redactionLogMergeService.mergeRedactionLogData(redactionLog, sectionGrid, redactionRequest.getDossierTemplateId(), redactionRequest.getManualRedactions(), redactionRequest.getExcludedPages());
merged.getRedactionLogEntry().removeIf(e -> e.isFalsePositive() && !redactionRequest.isIncludeFalsePositives());
return merged;
}

View File

@ -1,22 +1,16 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import static java.util.stream.Collectors.toSet;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
import lombok.Data;
import lombok.Getter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import lombok.Data;
import lombok.Getter;
@Data
public class Dictionary {
public static final String RECOMMENDATION_PREFIX = "recommendation_";
@Getter
private List<DictionaryModel> dictionaryModels;
private Map<String, DictionaryModel> localAccessMap = new HashMap<>();
@ -26,6 +20,7 @@ public class Dictionary {
public Dictionary(List<DictionaryModel> dictionaryModels, DictionaryVersion version) {
this.dictionaryModels = dictionaryModels;
this.dictionaryModels.forEach(dm -> localAccessMap.put(dm.getType(), dm));
this.version = version;
@ -33,6 +28,7 @@ public class Dictionary {
public int getDictionaryRank(String type) {
if (!localAccessMap.containsKey(type)) {
return 0;
}
@ -40,16 +36,6 @@ public class Dictionary {
}
public boolean isRecommendation(String type) {
DictionaryModel model = localAccessMap.get(type);
if (model != null) {
return model.isRecommendation();
}
return false;
}
public boolean hasLocalEntries() {
return dictionaryModels.stream().anyMatch(dm -> !dm.getLocalEntries().isEmpty());
@ -62,16 +48,18 @@ public class Dictionary {
}
public DictionaryModel getType(String type) {
return localAccessMap.get(type);
}
public boolean containsValue(String type, String value) {
return localAccessMap.containsKey(type) && localAccessMap.get(type)
.getValues(false)
.contains(value) || localAccessMap.containsKey(type) && localAccessMap.get(type)
.getValues(true)
.contains(value) || localAccessMap.containsKey(RECOMMENDATION_PREFIX + type) && localAccessMap.get(RECOMMENDATION_PREFIX + type)
.getValues(false)
.contains(value) || localAccessMap.containsKey(RECOMMENDATION_PREFIX + type) && localAccessMap.get(RECOMMENDATION_PREFIX + type)
.getValues(true)
.contains(value);
}
@ -85,6 +73,7 @@ public class Dictionary {
return false;
}
public boolean isCaseInsensitiveDictionary(String type) {
DictionaryModel dictionaryModel = localAccessMap.get(type);

View File

@ -0,0 +1,26 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.util.HashSet;
import java.util.Set;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class DictionaryEntries {
@Builder.Default
Set<DictionaryEntry> entries = new HashSet<>();
@Builder.Default
Set<DictionaryEntry> falsePositives = new HashSet<>();
@Builder.Default
Set<DictionaryEntry> falseRecommendations = new HashSet<>();
}

View File

@ -19,8 +19,9 @@ public class DictionaryModel implements Serializable {
private float[] color;
private boolean caseInsensitive;
private boolean hint;
private boolean recommendation;
private Set<DictionaryEntry> entries;
private Set<DictionaryEntry> falsePositives;
private Set<DictionaryEntry> falseRecommendations;
private Set<String> localEntries;
private boolean isDossierDictionary;
@ -29,4 +30,14 @@ public class DictionaryModel implements Serializable {
.toSet());
}
public Set<String> getFalsePositiveValues() {
return falsePositives.stream().filter(e -> !e.isDeleted()).map(e -> e.getValue()).collect(Collectors
.toSet());
}
public Set<String> getFalseRecommendationValues() {
return falseRecommendations.stream().filter(e -> !e.isDeleted()).map(e -> e.getValue()).collect(Collectors
.toSet());
}
}

View File

@ -48,11 +48,13 @@ public class Entity implements ReasonHolder {
private Set<Entity> references = new HashSet<>();
private EntityType entityType;
public Entity(String word, String type, boolean redaction, String redactionReason,
List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber,
String legalBasis, boolean isDictionaryEntry, String textBefore, String textAfter, Integer start,
Integer end, boolean isDossierDictionaryEntry, Set<Engine> engines, Set<Entity> references) {
Integer end, boolean isDossierDictionaryEntry, Set<Engine> engines, Set<Entity> references, EntityType entityType) {
this.word = word;
this.type = type;
@ -71,11 +73,12 @@ public class Entity implements ReasonHolder {
this.isDossierDictionaryEntry = isDossierDictionaryEntry;
this.engines = engines;
this.references = references;
this.entityType = entityType;
}
public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber,
boolean isDictionaryEntry, boolean isDossierDictionaryEntry, Engine engine) {
boolean isDictionaryEntry, boolean isDossierDictionaryEntry, Engine engine, EntityType entityType) {
this.word = word;
this.type = type;
@ -86,6 +89,7 @@ public class Entity implements ReasonHolder {
this.isDictionaryEntry = isDictionaryEntry;
this.isDossierDictionaryEntry = isDossierDictionaryEntry;
this.engines.add(engine);
this.entityType = entityType;
}

View File

@ -0,0 +1,5 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
public enum EntityType {
ENTITY, RECOMMENDATION, FALSE_POSITIVE, FALSE_RECOMMENDATION
}

View File

@ -1,6 +1,5 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import static com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary.RECOMMENDATION_PREFIX;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
@ -74,7 +73,7 @@ public class Section {
Set<Entity> entitiesOfType = nerEntities.stream().filter(nerEntity -> nerEntity.getType().equals(type)).collect(Collectors.toSet());
Set<String> values = entitiesOfType.stream().map(Entity::getWord).collect(Collectors.toSet());
Set<Entity> found = EntitySearchUtils.find(searchText, values, asType, headline, sectionNumber, false, false, Engine.NER, true);
Set<Entity> found = EntitySearchUtils.findEntities(searchText, values, dictionary.getType(asType), headline, sectionNumber, false, false, Engine.NER, true, true);
EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary);
Set<Entity> finalResult = new HashSet<>();
@ -119,7 +118,7 @@ public class Section {
} else if (!allowDuplicateTypes && foundParts.contains(entity.getType())) {
if (numberOfMatchParts >= minPartMatches) {
String value = searchText.substring(start, lastEnd);
found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER));
found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER, true));
}
start = -1;
lastEnd = -1;
@ -134,7 +133,7 @@ public class Section {
} else if (entity.getType().equals(startType) && start != -1) {
if (numberOfMatchParts >= minPartMatches) {
String value = searchText.substring(start, lastEnd);
found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER));
found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER, true));
}
start = entity.getStart();
lastEnd = entity.getEnd();
@ -151,7 +150,7 @@ public class Section {
if (numberOfMatchParts >= minPartMatches) {
String value = searchText.substring(start, lastEnd);
found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER));
found.addAll(findEntities(value, asType, false, true, 0, null, null, Engine.NER, true));
}
if (!found.isEmpty()) {
@ -291,7 +290,7 @@ public class Section {
String match = matcher.group(group);
if (StringUtils.isNotBlank(match)) {
Set<Entity> expandedEntities = findEntities(entity.getWord() + match, type, false, entity.isRedaction(), entity.getMatchedRule(), entity.getRedactionReason(), entity.getLegalBasis(), Engine.RULE);
Set<Entity> expandedEntities = findEntities(entity.getWord() + match, type, false, entity.isRedaction(), entity.getMatchedRule(), entity.getRedactionReason(), entity.getLegalBasis(), Engine.RULE, false);
expanded.addAll(EntitySearchUtils.findNonOverlappingMatchEntities(entities, expandedEntities));
}
}
@ -450,7 +449,7 @@ public class Section {
while (matcher.find()) {
String match = matcher.group(group);
if (StringUtils.isNotBlank(match) && match.length() >= 3) {
localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + asType, (x) -> new HashSet<>()).add(match);
localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(match);
}
}
}
@ -460,12 +459,11 @@ public class Section {
public void redactNotAndReference(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.REFERENCE_TYPE) String referenceType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason) {
boolean hasRecommendationDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
Set<Entity> references = entities.stream().filter(entity -> entity.getType().equals(referenceType)).collect(Collectors.toSet());
entities.forEach(entity -> {
if (entity.getType().equals(type) || hasRecommendationDictionary && entity.getType().equals(RECOMMENDATION_PREFIX + type)) {
if (entity.getType().equals(type)) {
entity.setRedaction(false);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(reason);
@ -494,21 +492,26 @@ public class Section {
public void addRedaction(@Argument(ArgumentType.STRING) String value, @Argument(ArgumentType.TYPE) String asType, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason, @Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
Set<Entity> found = findEntities(value.trim(), asType, true, true, ruleNumber, reason, legalBasis, Engine.RULE);
Set<Entity> found = findEntities(value.trim(), asType, true, true, ruleNumber, reason, legalBasis, Engine.RULE, false);
EntitySearchUtils.addEntitiesIgnoreRank(entities, found);
}
public void ignore(String type) {
entities.removeIf(entity -> entity.getType().equals(type));
entities.removeIf(entity -> entity.getType().equals(type) && entity.getEntityType().equals(EntityType.ENTITY));
}
public void ignoreRecommendations(String type) {
entities.removeIf(entity -> entity.getType().equals(type) && entity.getEntityType().equals(EntityType.RECOMMENDATION));
}
@ThenAction
public void expandToHintAnnotationByRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.STRING) String pattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group,
@Argument(ArgumentType.TYPE) String asType) {
public void expandToFalsePositiveByRegEx(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.STRING) String pattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive, @Argument(ArgumentType.INTEGER) int group) {
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
@ -524,13 +527,14 @@ public class Section {
while (matcher.find()) {
String match = matcher.group(group);
if (StringUtils.isNotBlank(match)) {
expanded.addAll(findEntities(entity.getWord() + match, asType, false, false, 0, null, null, Engine.RULE));
expanded.addAll(findEntities(entity.getWord() + match, type, false, false, 0, null, null, Engine.RULE, false));
}
}
}
EntitySearchUtils.addEntitiesWithHigherRank(entities, expanded, dictionary);
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
expanded.forEach(e -> e.setEntityType(EntityType.FALSE_POSITIVE));
}
@ -545,7 +549,7 @@ public class Section {
while (matcher.find()) {
String match = matcher.group(group);
if (StringUtils.isNotBlank(match)) {
Set<Entity> found = findEntities(match.trim(), asType, false, false, 0, null, null, Engine.RULE);
Set<Entity> found = findEntities(match.trim(), asType, false, false, 0, null, null, Engine.RULE, false);
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
}
}
@ -555,7 +559,7 @@ public class Section {
@ThenAction
public void addHintAnnotation(@Argument(ArgumentType.STRING) String value, @Argument(ArgumentType.TYPE) String asType) {
Set<Entity> found = findEntities(value.trim(), asType, true, false, 0, null, null, Engine.RULE);
Set<Entity> found = findEntities(value.trim(), asType, true, false, 0, null, null, Engine.RULE, false);
EntitySearchUtils.addEntitiesIgnoreRank(entities, found);
}
@ -577,7 +581,7 @@ public class Section {
}
if (StringUtils.isNotBlank(cleanValue) && cleanValue.length() >= 3) {
localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + asType, (x) -> new HashSet<>()).add(cleanValue);
localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(cleanValue);
}
}
}
@ -599,20 +603,20 @@ public class Section {
while (matcher.find()) {
String match = matcher.group(group);
if (StringUtils.isNotBlank(match) && match.length() >= 3) {
Set<Entity> found = findEntities(match.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE);
Set<Entity> found = findEntities(match.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false);
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + asType, (x) -> new HashSet<>()).add(match);
localDictionaryAdds.computeIfAbsent(asType, (x) -> new HashSet<>()).add(match);
}
}
}
private Set<Entity> findEntities(String value, String asType, boolean caseInsensitive, boolean redacted, int ruleNumber, String reason, String legalBasis, Engine engine) {
private Set<Entity> findEntities(String value, String asType, boolean caseInsensitive, boolean redacted, int ruleNumber, String reason, String legalBasis, Engine engine, boolean asRecommendation) {
String text = caseInsensitive ? searchText.toLowerCase() : searchText;
String searchValue = caseInsensitive ? value.toLowerCase() : value;
Set<Entity> found = EntitySearchUtils.find(text, Set.of(searchValue), asType, headline, sectionNumber, false, false, engine, false);
Set<Entity> found = EntitySearchUtils.findEntities(text, Set.of(searchValue), dictionary.getType(asType), headline, sectionNumber, false, false, engine, false, asRecommendation);
found.forEach(entity -> {
if (redacted) {
@ -629,10 +633,8 @@ public class Section {
private void redact(String type, int ruleNumber, String reason, String legalBasis, boolean redaction) {
boolean hasRecommendationDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
entities.forEach(entity -> {
if (entity.getType().equals(type) || hasRecommendationDictionary && entity.getType().equals(RECOMMENDATION_PREFIX + type)) {
if (entity.getType().equals(type)) {
entity.setRedaction(redaction);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(reason);
@ -665,7 +667,7 @@ public class Section {
} else {
String word = value.toString();
Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false, false, Engine.RULE);
Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false, false, Engine.RULE, EntityType.ENTITY);
entity.setRedaction(redact);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(reason);
@ -692,9 +694,9 @@ public class Section {
while (matcher.find()) {
String match = matcher.group().trim();
if (match.length() >= 3) {
localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + type, (x) -> new HashSet<>()).add(match);
localDictionaryAdds.computeIfAbsent(type, (x) -> new HashSet<>()).add(match);
String lastname = match.split(" ")[0];
localDictionaryAdds.computeIfAbsent(RECOMMENDATION_PREFIX + type, (x) -> new HashSet<>()).add(lastname);
localDictionaryAdds.computeIfAbsent(type, (x) -> new HashSet<>()).add(lastname);
}
}
}
@ -709,7 +711,7 @@ public class Section {
if (values != null) {
for (String value : values) {
if (StringUtils.isNotBlank(value)) {
Set<Entity> found = findEntities(value.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE);
Set<Entity> found = findEntities(value.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false);
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
if (redactEverywhere && !isLocal()) {
@ -730,7 +732,7 @@ public class Section {
while (matcher.find()) {
String match = matcher.group(group);
if (StringUtils.isNotBlank(match)) {
Set<Entity> found = findEntities(match.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE);
Set<Entity> found = findEntities(match.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false);
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
}
}
@ -745,7 +747,7 @@ public class Section {
for (String value : values) {
if (StringUtils.isNotBlank(value)) {
Set<Entity> found = findEntities(value.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE);
Set<Entity> found = findEntities(value.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false);
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
if (redactEverywhere && !isLocal()) {
@ -772,7 +774,7 @@ public class Section {
return;
}
Set<Entity> found = findEntities(line.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE);
Set<Entity> found = findEntities(line.trim(), asType, false, redaction, ruleNumber, reason, legalBasis, Engine.RULE, false);
EntitySearchUtils.addEntitiesWithHigherRank(entities, found, dictionary);
if (redactEverywhere && !isLocal()) {

View File

@ -99,18 +99,33 @@ public class DictionaryService {
}
Set<DictionaryEntry> entries = new HashSet<>();
var newEntries = convertEntries(t.getId(), currentVersion);
var newValues = newEntries.stream().map(v -> v.getValue()).collect(Collectors.toSet());
Set<DictionaryEntry> falsePositives = new HashSet<>();
Set<DictionaryEntry> falseRecommendations = new HashSet<>();
DictionaryEntries newEntries = getEntries(t.getId(), currentVersion);
var newValues = newEntries.getEntries().stream().map(v -> v.getValue()).collect(Collectors.toSet());
var newFalsePositivesValues = newEntries.getFalsePositives().stream().map(v -> v.getValue()).collect(Collectors.toSet());
var newFalseRecommendationsValues = newEntries.getFalseRecommendations().stream().map(v -> v.getValue()).collect(Collectors.toSet());
// add old entries from existing DictionaryModel
oldModel.ifPresent(dictionaryModel -> entries.addAll(dictionaryModel.getEntries().stream().filter(
f -> !newValues.contains(f.getValue())).collect(Collectors.toList())
));
oldModel.ifPresent(dictionaryModel -> falsePositives.addAll(dictionaryModel.getFalsePositives().stream().filter(
f -> !newFalsePositivesValues.contains(f.getValue())).collect(Collectors.toList())
));
oldModel.ifPresent(dictionaryModel -> falseRecommendations.addAll(dictionaryModel.getFalseRecommendations().stream().filter(
f -> !newFalseRecommendationsValues.contains(f.getValue())).collect(Collectors.toList())
));
// Add Increments
entries.addAll(convertEntries(t.getId(), currentVersion));
entries.addAll(newEntries.getEntries());
falsePositives.addAll(newEntries.getFalsePositives());
falseRecommendations.addAll(newEntries.getFalseRecommendations());
return new DictionaryModel(t.getType(), t.getRank(), convertColor(t.getHexColor()), t.isCaseInsensitive(), t
.isHint(), t.isRecommendation(), entries, new HashSet<>(), dossierId != null);
.isHint(), entries, falsePositives, falseRecommendations, new HashSet<>(), dossierId != null);
})
.sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed())
.collect(Collectors.toList());
@ -140,17 +155,20 @@ public class DictionaryService {
}
private Set<DictionaryEntry> convertEntries(String typeId, Long fromVersion) {
private DictionaryEntries getEntries(String typeId, Long fromVersion) {
var type = dictionaryClient.getDictionaryForType(typeId, fromVersion);
Set<DictionaryEntry> entries = new HashSet<>(type
.getEntries());
Set<DictionaryEntry> entries = type.getEntries() != null ? new HashSet<>(type.getEntries()) : new HashSet<>();
Set<DictionaryEntry> falsePositives = type.getFalsePositiveEntries() != null ? new HashSet<>(type.getFalsePositiveEntries()) : new HashSet<>();
Set<DictionaryEntry> falseRecommendations = type.getFalseRecommendationEntries() != null ? new HashSet<>(type.getFalseRecommendationEntries()) : new HashSet<>();
if (type.isCaseInsensitive()) {
entries.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT)));
falsePositives.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT)));
falseRecommendations.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT)));
}
return entries;
return new DictionaryEntries(entries, falsePositives, falseRecommendations);
}
@ -191,16 +209,6 @@ public class DictionaryService {
}
public boolean isRecommendation(String type, String dossierTemplateId) {
DictionaryModel model = dictionariesByDossierTemplate.get(dossierTemplateId).getLocalAccessMap().get(type);
if (model != null) {
return model.isRecommendation();
}
return false;
}
public Dictionary getDeepCopyDictionary(String dossierTemplateId, String dossierId) {
List<DictionaryModel> copy = new ArrayList<>();

View File

@ -25,6 +25,7 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel
import com.iqser.red.service.redaction.v1.server.redaction.model.Entities;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
@ -177,7 +178,7 @@ public class EntityRedactionService {
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity
.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry(), entity.getEngines(), entity
.getReferences()));
.getReferences(), entity.getEntityType()));
}
}
return entitiesPerPage;
@ -199,13 +200,6 @@ public class EntityRedactionService {
private void addLocalValuesToDictionary(Section analysedSection, Dictionary dictionary) {
analysedSection.getLocalDictionaryAdds().keySet().forEach(key -> {
if (dictionary.isRecommendation(key)) {
analysedSection.getLocalDictionaryAdds().get(key).forEach(value -> {
if (!dictionary.containsValue(key, value)) {
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
}
});
} else {
analysedSection.getLocalDictionaryAdds().get(key).forEach(value -> {
if (dictionary.getLocalAccessMap().get(key) == null) {
@ -218,7 +212,6 @@ public class EntityRedactionService {
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
});
}
});
}
@ -236,11 +229,9 @@ public class EntityRedactionService {
String lowercaseInputString = searchableString.toLowerCase();
for (DictionaryModel model : dictionary.getDictionaryModels()) {
if (model.isCaseInsensitive()) {
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(lowercaseInputString, model.getValues(local), model
.getType(), headline, sectionNumber, !local, model.isDossierDictionary(), local ? Engine.RULE : Engine.DICTIONARY, false));
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.findEntities(lowercaseInputString, model.getValues(local), model, headline, sectionNumber, !local, model.isDossierDictionary(), local ? Engine.RULE : Engine.DICTIONARY, false, local ? true : false));
} else {
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(searchableString, model.getValues(local), model
.getType(), headline, sectionNumber, !local, model.isDossierDictionary(), local ? Engine.RULE : Engine.DICTIONARY, false));
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.findEntities(searchableString, model.getValues(local), model, headline, sectionNumber, !local, model.isDossierDictionary(), local ? Engine.RULE : Engine.DICTIONARY, false, local ? true : false));
}
}
@ -261,7 +252,7 @@ public class EntityRedactionService {
if (redactionServiceSettings.isNerServiceEnabled() && nerEntities.getResult().containsKey(sectionNumber)) {
nerEntities.getResult().get(sectionNumber).forEach(res -> {
if (cellStarts == null || cellStarts.isEmpty()) {
entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER));
entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION));
} else {
boolean intersectsCellStart = false;
for (Integer cellStart : cellStarts) {
@ -271,7 +262,7 @@ public class EntityRedactionService {
}
}
if (!intersectsCellStart) {
entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER));
entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION));
}
}
});

View File

@ -19,6 +19,7 @@ import com.iqser.red.service.redaction.v1.server.classification.model.Text;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
@ -90,7 +91,7 @@ public class ManualRedactionSurroundingTextService {
private Pair<String, String> findSurroundingText(SectionText sectionText, String value,
List<Rectangle> toFindPositions) {
Set<Entity> entities = EntitySearchUtils.find(sectionText.getText(), Set.of(value), "dummy", sectionText.getHeadline(), sectionText.getSectionNumber(), false, false, Engine.DICTIONARY, false);
Set<Entity> entities = EntitySearchUtils.find(sectionText.getText(), Set.of(value), "dummy", sectionText.getHeadline(), sectionText.getSectionNumber(), false, false, Engine.DICTIONARY, false, EntityType.ENTITY);
Set<Entity> entitiesWithPositions = EntitySearchUtils.clearAndFindPositions(entities, sectionText.getSearchableText(), null);
Entity correctEntity = getEntityOnCorrectPosition(entitiesWithPositions, toFindPositions);

View File

@ -17,6 +17,7 @@ import com.iqser.red.service.redaction.v1.server.parsing.model.RedTextPosition;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
@ -203,7 +204,8 @@ public class RedactionLogCreatorService {
.type(entity.getType())
.redacted(entity.isRedaction())
.isHint(isHint(entity.getType(), dossierTemplateId))
.isRecommendation(isRecommendation(entity.getType(), dossierTemplateId))
.isRecommendation(entity.getEntityType().equals(EntityType.RECOMMENDATION))
.isFalsePositive(entity.getEntityType().equals(EntityType.FALSE_POSITIVE) || entity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION))
.section(entity.getHeadline())
.sectionNumber(entity.getSectionNumber())
.matchedRule(entity.getMatchedRule())
@ -233,10 +235,4 @@ public class RedactionLogCreatorService {
return dictionaryService.isHint(type, dossierTemplateId);
}
private boolean isRecommendation(String type, String dossierTemplateId) {
return dictionaryService.isRecommendation(type, dossierTemplateId);
}
}

View File

@ -15,8 +15,10 @@ import java.util.stream.Collectors;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
@ -55,9 +57,27 @@ public class EntitySearchUtils {
}
public Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber, boolean isDictionaryEntry, boolean isDossierDictionary,
Engine engine, boolean ignoreMinLength) {
public Set<Entity> findEntities(String inputString, Set<String> values, DictionaryModel type, String headline, int sectionNumber, boolean isDictionaryEntry, boolean isDossierDictionary,
Engine engine, boolean ignoreMinLength, boolean asRecommendation) {
Set<Entity> found = find(inputString, values, type.getType(), headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine, ignoreMinLength, asRecommendation ? EntityType.RECOMMENDATION : EntityType.ENTITY);
if(asRecommendation){
Set<Entity> falseRecommendations = find(inputString, type.getFalseRecommendationValues(), type.getType(), headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine, ignoreMinLength, EntityType.FALSE_RECOMMENDATION);
removeFalsePositives(found, falseRecommendations);
found.addAll(falseRecommendations);
} else {
Set<Entity> falsePositives = find(inputString, type.getFalsePositiveValues(), type.getType(), headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine, ignoreMinLength, EntityType.FALSE_POSITIVE);
removeFalsePositives(found, falsePositives);
found.addAll(falsePositives);
}
return found;
}
public Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber, boolean isDictionaryEntry, boolean isDossierDictionary,
Engine engine, boolean ignoreMinLength, EntityType entityType) {
Set<Entity> found = new HashSet<>();
for (String value : values) {
@ -75,7 +95,7 @@ public class EntitySearchUtils {
stopIndex = startIndex + cleanValue.length();
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine));
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine, entityType));
}
} while (startIndex > -1);
}
@ -83,6 +103,7 @@ public class EntitySearchUtils {
}
private boolean isSeparator(char c) {
return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '' || c == '';
@ -119,6 +140,25 @@ public class EntitySearchUtils {
}
public void removeFalsePositives(Set<Entity> entities, Set<Entity> falsePositives) {
List<Entity> wordsToRemove = new ArrayList<>();
for (Entity word : falsePositives) {
for (Entity inner : entities) {
if (inner.getWord().length() < word.getWord()
.length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word.getSectionNumber() == inner.getSectionNumber()) {
wordsToRemove.add(inner);
}
}
}
entities.removeAll(wordsToRemove);
entities.removeAll(falsePositives);
}
public void removeEntitiesContainedInLarger(Set<Entity> entities) {
List<Entity> wordsToRemove = new ArrayList<>();
@ -126,8 +166,7 @@ public class EntitySearchUtils {
for (Entity inner : entities) {
if (inner.getWord().length() < word.getWord()
.length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word.getSectionNumber() == inner.getSectionNumber()) {
// FIXME this is workaround for RED-3327 and should be removed in the future.
if (word.getType().contains("recommendation_") && !inner.getType().contains("recommendation_")) {
if (word.getEntityType().equals(EntityType.RECOMMENDATION) && word.getEntityType().equals(EntityType.ENTITY)) {
wordsToRemove.add(word);
} else {
wordsToRemove.add(inner);
@ -135,7 +174,7 @@ public class EntitySearchUtils {
}
}
}
wordsToRemove.forEach(entities::remove);
wordsToRemove.stream().filter(e -> !e.getEntityType().equals(EntityType.FALSE_POSITIVE) && !e.getEntityType().equals(EntityType.FALSE_RECOMMENDATION)).forEach(entities::remove);
}

View File

@ -86,12 +86,6 @@ public class RedactionIntegrationTest {
private static final String OCR = "ocr";
private static final String DOSSIER_REDACTIONS = "dossier_redactions";
private static final String IMPORTED_REDACTION = "imported_redaction";
private static final String RECOMMENDATION_AUTHOR = "recommendation_CBI_author";
private static final String RECOMMENDATION_ADDRESS = "recommendation_CBI_address";
private static final String FALSE_POSITIVE = "false_positive";
private static final String PII = "PII";
@Autowired
@ -129,6 +123,8 @@ public class RedactionIntegrationTest {
private final Map<String, List<String>> dictionary = new HashMap<>();
private final Map<String, List<String>> dossierDictionary = new HashMap<>();
private final Map<String, List<String>> falsePositive = new HashMap<>();
private final Map<String, List<String>> falseRecommendation = new HashMap<>();
private final Map<String, String> typeColorMap = new HashMap<>();
private final Map<String, Boolean> hintTypeMap = new HashMap<>();
private final Map<String, Boolean> caseInSensitiveMap = new HashMap<>();
@ -226,9 +222,6 @@ public class RedactionIntegrationTest {
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false));
when(dictionaryClient.getDictionaryForType(TEST_METHOD + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(TEST_METHOD, false));
when(dictionaryClient.getDictionaryForType(PII + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PII, false));
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR, false));
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS, false));
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(FALSE_POSITIVE, false));
when(dictionaryClient.getDictionaryForType(PURITY + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(PURITY, false));
when(dictionaryClient.getDictionaryForType(IMAGE + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(IMAGE, false));
when(dictionaryClient.getDictionaryForType(OCR + ":" + TEST_DOSSIER_TEMPLATE_ID, version)).thenReturn(getDictionaryResponse(OCR, false));
@ -492,8 +485,6 @@ public class RedactionIntegrationTest {
deleted.add("David Chubb");
deleted.add("mouse");
dictionary.get(FALSE_POSITIVE).add("David Chubb");
reanlysisVersions.put("David Chubb", 3L);
reanlysisVersions.put("mouse", 3L);
@ -501,7 +492,6 @@ public class RedactionIntegrationTest {
when(dictionaryClient.getDictionaryForType(VERTEBRATE, null)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, null)).thenReturn(getDictionaryResponse(FALSE_POSITIVE, false));
start = System.currentTimeMillis();
@ -665,7 +655,6 @@ public class RedactionIntegrationTest {
deleted.add("David Chubb");
deleted.add("mouse");
dictionary.get(FALSE_POSITIVE).add("David Chubb");
reanlysisVersions.put("David Chubb", 3L);
reanlysisVersions.put("mouse", 3L);
@ -674,7 +663,6 @@ public class RedactionIntegrationTest {
when(dictionaryClient.getDictionaryForType(VERTEBRATE, null)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, null)).thenReturn(getDictionaryResponse(FALSE_POSITIVE, false));
start = System.currentTimeMillis();
@ -1263,21 +1251,6 @@ public class RedactionIntegrationTest {
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(RECOMMENDATION_AUTHOR, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/recommendation_CBI_author.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(RECOMMENDATION_ADDRESS, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/recommendation_CBI_address.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(FALSE_POSITIVE, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/false_positive.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dictionary.computeIfAbsent(PURITY, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/purity.txt")
.stream()
@ -1314,6 +1287,13 @@ public class RedactionIntegrationTest {
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
dossierDictionary.put(IMPORTED_REDACTION, new ArrayList<>());
falsePositive.computeIfAbsent(PII, v -> new ArrayList<>())
.addAll(ResourceLoader.load("dictionaries/PII_false_positive.txt")
.stream()
.map(this::cleanDictionaryEntry)
.collect(Collectors.toSet()));
}
@ -1367,9 +1347,6 @@ public class RedactionIntegrationTest {
typeColorMap.put(PUBLISHED_INFORMATION, "#85ebff");
typeColorMap.put(TEST_METHOD, "#91fae8");
typeColorMap.put(PII, "#66ccff");
typeColorMap.put(RECOMMENDATION_AUTHOR, "#8df06c");
typeColorMap.put(RECOMMENDATION_ADDRESS, "#8df06c");
typeColorMap.put(FALSE_POSITIVE, "#ffffff");
typeColorMap.put(PURITY, "#ffe187");
typeColorMap.put(IMAGE, "#fcc5fb");
typeColorMap.put(OCR, "#fcc5fb");
@ -1389,9 +1366,6 @@ public class RedactionIntegrationTest {
hintTypeMap.put(PUBLISHED_INFORMATION, true);
hintTypeMap.put(TEST_METHOD, true);
hintTypeMap.put(PII, false);
hintTypeMap.put(RECOMMENDATION_AUTHOR, false);
hintTypeMap.put(RECOMMENDATION_ADDRESS, false);
hintTypeMap.put(FALSE_POSITIVE, true);
hintTypeMap.put(PURITY, false);
hintTypeMap.put(IMAGE, true);
hintTypeMap.put(OCR, true);
@ -1412,9 +1386,6 @@ public class RedactionIntegrationTest {
caseInSensitiveMap.put(PUBLISHED_INFORMATION, true);
caseInSensitiveMap.put(TEST_METHOD, false);
caseInSensitiveMap.put(PII, false);
caseInSensitiveMap.put(RECOMMENDATION_AUTHOR, false);
caseInSensitiveMap.put(RECOMMENDATION_ADDRESS, false);
caseInSensitiveMap.put(FALSE_POSITIVE, false);
caseInSensitiveMap.put(PURITY, false);
caseInSensitiveMap.put(IMAGE, true);
caseInSensitiveMap.put(OCR, true);
@ -1435,9 +1406,6 @@ public class RedactionIntegrationTest {
recommendationTypeMap.put(PUBLISHED_INFORMATION, false);
recommendationTypeMap.put(TEST_METHOD, false);
recommendationTypeMap.put(PII, false);
recommendationTypeMap.put(RECOMMENDATION_AUTHOR, true);
recommendationTypeMap.put(RECOMMENDATION_ADDRESS, true);
recommendationTypeMap.put(FALSE_POSITIVE, false);
recommendationTypeMap.put(PURITY, false);
recommendationTypeMap.put(IMAGE, false);
recommendationTypeMap.put(OCR, false);
@ -1447,7 +1415,6 @@ public class RedactionIntegrationTest {
recommendationTypeMap.put(DOSSIER_REDACTIONS, false);
recommendationTypeMap.put(IMPORTED_REDACTION, false);
rankTypeMap.put(FALSE_POSITIVE, 160);
rankTypeMap.put(PURITY, 155);
rankTypeMap.put(PII, 150);
rankTypeMap.put(ADDRESS, 140);
@ -1460,8 +1427,6 @@ public class RedactionIntegrationTest {
rankTypeMap.put(PUBLISHED_INFORMATION, 70);
rankTypeMap.put(TEST_METHOD, 60);
rankTypeMap.put(HINT_ONLY, 50);
rankTypeMap.put(RECOMMENDATION_AUTHOR, 40);
rankTypeMap.put(RECOMMENDATION_ADDRESS, 30);
rankTypeMap.put(IMAGE, 30);
rankTypeMap.put(OCR, 29);
rankTypeMap.put(LOGO, 28);
@ -1511,6 +1476,8 @@ public class RedactionIntegrationTest {
.id(type + ":" + TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColorMap.get(type))
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
.falsePositiveEntries(falsePositive.containsKey(type) ? toDictionaryEntry(falsePositive.get(type)) : new ArrayList<>())
.falseRecommendationEntries(falseRecommendation.containsKey(type) ? toDictionaryEntry(falseRecommendation.get(type)) : new ArrayList<>())
.isHint(hintTypeMap.get(type))
.isCaseInsensitive(caseInSensitiveMap.get(type))
.isRecommendation(recommendationTypeMap.get(type))

View File

@ -9,6 +9,7 @@ import org.junit.Test;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType;
public class EntitySearchUtilsTest {
@ -16,8 +17,8 @@ public class EntitySearchUtilsTest {
public void testNestedEntitiesRemoval() {
Set<Entity> entities = new HashSet<>();
Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false, false, Engine.RULE);
Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false, false, Engine.RULE);
Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
entities.add(nested);
entities.add(nesting);
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
@ -39,14 +40,14 @@ public class EntitySearchUtilsTest {
// Arrange
Set<Entity> existingEntities = new HashSet<>();
Entity existingEntity1 = new Entity("Batman", "fake type", 0, 5, "fake headline", 0, false, false, Engine.RULE);
Entity existingEntity2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE);
Entity existingEntity1 = new Entity("Batman", "fake type", 0, 5, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
Entity existingEntity2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
existingEntities.add(existingEntity1);
existingEntities.add(existingEntity2);
Set<Entity> foundEntities = new HashSet<>();
Entity foundEntities1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE);
Entity foundEntities2 = new Entity("Superman Y.", "fake type", 10, 20, "fake headline", 0, false, false, Engine.RULE);
Entity foundEntities1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
Entity foundEntities2 = new Entity("Superman Y.", "fake type", 10, 20, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
foundEntities.add(foundEntities1);
foundEntities.add(foundEntities2);
@ -72,14 +73,14 @@ public class EntitySearchUtilsTest {
// Arrange
Set<Entity> existingEntities = new HashSet<>();
Entity existingEntity1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE);
Entity existingEntity2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE);
Entity existingEntity1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
Entity existingEntity2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
existingEntities.add(existingEntity1);
existingEntities.add(existingEntity2);
Set<Entity> foundEntities = new HashSet<>();
Entity foundEntities1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE);
Entity foundEntities2 = new Entity("X. Superman Y.", "fake type", 7, 20, "fake headline", 0, false, false, Engine.RULE);
Entity foundEntities1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
Entity foundEntities2 = new Entity("X. Superman Y.", "fake type", 7, 20, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
foundEntities.add(foundEntities1);
foundEntities.add(foundEntities2);
@ -104,14 +105,14 @@ public class EntitySearchUtilsTest {
// Arrange
Set<Entity> existingEntities = new HashSet<>();
Entity existingEntity1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE);
Entity existingEntity2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE);
Entity existingEntity1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE,EntityType.ENTITY);
Entity existingEntity2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
existingEntities.add(existingEntity1);
existingEntities.add(existingEntity2);
Set<Entity> foundEntities = new HashSet<>();
Entity foundEntities1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE);
Entity foundEntities2 = new Entity("X. Superman", "fake type", 7, 17, "fake headline", 0, false, false, Engine.RULE);
Entity foundEntities1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
Entity foundEntities2 = new Entity("X. Superman", "fake type", 7, 17, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
foundEntities.add(foundEntities1);
foundEntities.add(foundEntities2);
@ -136,14 +137,14 @@ public class EntitySearchUtilsTest {
// Arrange
Set<Entity> existingEntities = new HashSet<>();
Entity existingEntity1 = new Entity("X. Superman", "fake type", 7, 17, "fake headline", 0, false, false, Engine.RULE);
Entity existingEntity2 = new Entity("Batman", "fake type", 0, 5, "fake headline", 0, false, false, Engine.RULE);
Entity existingEntity1 = new Entity("X. Superman", "fake type", 7, 17, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
Entity existingEntity2 = new Entity("Batman", "fake type", 0, 5, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
existingEntities.add(existingEntity1);
existingEntities.add(existingEntity2);
Set<Entity> foundEntities = new HashSet<>();
Entity foundEntities1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE);
Entity foundEntities2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE);
Entity foundEntities1 = new Entity("Batman X.", "fake type", 0, 8, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
Entity foundEntities2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
foundEntities.add(foundEntities1);
foundEntities.add(foundEntities2);
@ -166,14 +167,14 @@ public class EntitySearchUtilsTest {
// Arrange
Set<Entity> existingEntities = new HashSet<>();
Entity existingEntity1 = new Entity("X. Superman", "fake type", 7, 17, "fake headline", 0, false, false, Engine.RULE);
Entity existingEntity2 = new Entity("Batman", "fake type", 0, 5, "fake headline", 0, false, false, Engine.RULE);
Entity existingEntity1 = new Entity("X. Superman", "fake type", 7, 17, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
Entity existingEntity2 = new Entity("Batman", "fake type", 0, 5, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
existingEntities.add(existingEntity1);
existingEntities.add(existingEntity2);
Set<Entity> foundEntities = new HashSet<>();
Entity foundEntities1 = new Entity("Batman X. Superman", "fake type", 0, 17, "fake headline", 0, false, false, Engine.RULE);
Entity foundEntities2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE);
Entity foundEntities1 = new Entity("Batman X. Superman", "fake type", 0, 17, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
Entity foundEntities2 = new Entity("Superman", "fake type", 10, 17, "fake headline", 0, false, false, Engine.RULE, EntityType.ENTITY);
foundEntities.add(foundEntities1);
foundEntities.add(foundEntities2);

View File

@ -1,239 +0,0 @@
(Parent
ADME - Bioanalyses, Vergèze,
Aeration: Gentle
Andrews mean
Andrews pKi
BASF study code
Billington
Bond elute
Brown Liquid
Brown Orange
Brown Solid
Brown disco
Brown discoloured
Brown fat
Brown liquid
Brown solid
Buehler (1980),
Buehler (1980).
Buehler 9
Buehler Test.
Buehler assay
Buehler test
Buehler)
Buehler-like :
Calandra lark
Cary, NC,
Chambersas for
Class 4
Class III
Class III)
Class method,
Class2 (effectively
ClassMethod) Vischim
Clay =
Cochran-Armitage Test.
Cochran-armitage test.
Dunnett 2
Dunnett's "t''
Dunnett's Test
Dunnett's multiple
Dunnett's t-test
Dunnett's test
Dunnett) **
Dunnett) Dose
Dunnett) Food
Dunnett) Lower
Dunnett, 1964)
Dunnetts Multiple
Dunnetts T
Dunnetts Test)
Dunnetts adjustment
Dunnetts multiple
Dunnetts post
Dunnetts t-test)
Fine vacuolation
Fine white
Finewhite powder
Fischer 344
Fischer 344)
Fischer F344
Fischer rat
Fischer) Contaminants:
Fischer-344 acceptability
Fischer-344/
Fischer-344/DuCrj acceptability
Fischer344 rats.
Fischer344) :
Fischerrat embryo
Fischers exact
Fisher 344
Fisher Exact Test
Fisher Exact test
Fisher exact test
Fisher's Exact
Fisher-344 rat
Fisher344 (F344/
Fisher344 rat
Fisher344 rats
Fisher344 rats.
Fisher344 rats/
Fisher344) group
Fishers exact.
Freeman-Tukey transform
Gentle aeration
Grade 2:
Grade 3)
Grade 3:
Green
Green Alga
Green as
Green-, blue-,
GreenAlga, Selenastrum
GreenAlga.. Springborn
Gross External
Gross Necropsy
Gross and
Gross necropsies
Gross necropsy
Gross pathology
Gross pathology:
Hall Farm
Hall Farm)
HallFarm (396
Ham's F12
Hams F10
Hand-held
HarlanWinkelmann, Borchem,
Hill International,
Hill Top
Hill criteria
Hill, Bracknell,
HillTop Chamber,
Hillmodel was
Hills model
Japanese White
Jerneplaque assay
Kent. Blood
Klimisch score
Litchfield and Wilcoxon
Long term
Long-Term Toxicity
Long-term
Longbw/
Major Macroscopic findings
Major biliary
Major defects
Major deviations
Major fetal
Major metabolic
Major metabolites
Major route
Major treatment-related
Mallard (Anas
Mallard with
Manual-Hand held
Marsh Frog
Marsh harrier
Masonjars (500
Meier formula)
Meier survival
Miles anti-Canine
Miles anti-dog
Netherlands, 2011
Netherlands, published:
Netherlands. Report
Penn State
Porewater:
Rattray study
Read across
Reed Bunting
Reichert-Jung Cryo
RifconGmbH. Oxon
Ritz and
S.P.-U/
San Diego,
Shaw's Farm,
Sprague
Strain JapaneseWhite
Strong morphological
Weight -
Weight 2744
Weight Gain
Weight On
Weight Range
Weight and
Weight and length
Weight at
Weight gain
Weight loss
Weight of
Weight only
Weight range
Weight, age
Weight, feed
Weight-of-evidence approach
Weight:
Weight; Adult
Weight; Whole
WeightGain 0-1
WeightRange:
Weightat dosing:
Weightrange 200-250g.
White (Tif:
White 10
White 3
White Age
White Powder
White Strain
White blood
White cell
White crystalline
White guideline
White guinea-pigs
White hygroscopic
White powder
White solid
White stork
White swan
White to off-white
White wagtail
White, solid
WhitePowder Lot/
Whiteguideline :
Whitehygroscopic solid
Whitepowder Lot/
Whitepowder Stability
Whitesolid Batch
Whiteswan)
Whiteto off-white
Wildlife International specifications
Wildlife Internationalspecifications Water:
Williams E
Williams medium
Williams' medium
WilliamsE medium
Williams test
Wilson's method.
Wilson. All foetal
Wilson.All foetal
Wood pigeons
York.
Zeneca dossier);
Zenecadossier); see
green algae
CTL
No details reported
Not given
Not indicated
Not reported
Not reported.
Not stated
Syngenta
Syngenta AG
N/A
No details reported
Not available
Test facility
TBD
David Chubb

View File

@ -9,10 +9,9 @@ global Section section
//rule "0: Expand CBI Authors with firstname initials"
// when
// Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
// Section(matchesType("CBI_author"))
// then
// section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
// section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1);
// end
@ -20,22 +19,21 @@ rule "0: Add CBI_author from ai"
when
Section(aiMatchesType("CBI_author"))
then
section.addAiEntities("CBI_author", "recommendation_CBI_author");
section.addAiEntities("CBI_author", "CBI_author");
end
rule "0: Combine ai types CBI_author from ai"
when
Section(aiMatchesType("ORG"))
then
section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "recommendation_CBI_address", 3, false);
section.combineAiTypes("ORG", "STREET,POSTAL,COUNTRY,CARDINAL,CITY,STATE", 20, "CBI_address", 3, false);
end
rule "0: Expand CBI Authors with firstname initials"
when
Section(matchesType("CBI_author") || matchesType("recommendation_CBI_author"))
Section(matchesType("CBI_author"))
then
section.expandByRegEx("CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1, "[^\\s]+");
section.expandByRegEx("recommendation_CBI_author", "(,? [A-Z]\\.?( ?[A-Z]\\.?)?( ?[A-Z]\\.?)?\\b\\.?)", false, 1, "[^\\s]+");
end