Merge branch 'AZURE_NER_FP' into 'master'
RED-9918: Azure entity recognition (Spike) See merge request redactmanager/redaction-service!496
This commit is contained in:
commit
9d668f9be1
@ -4,7 +4,7 @@ plugins {
|
||||
}
|
||||
|
||||
description = "redaction-service-api-v1"
|
||||
val persistenceServiceVersion = "2.530.0"
|
||||
val persistenceServiceVersion = "2.531.0"
|
||||
|
||||
dependencies {
|
||||
implementation("org.springframework:spring-web:6.0.12")
|
||||
|
||||
@ -16,7 +16,7 @@ val layoutParserVersion = "0.141.0"
|
||||
val jacksonVersion = "2.15.2"
|
||||
val droolsVersion = "9.44.0.Final"
|
||||
val pdfBoxVersion = "3.0.0"
|
||||
val persistenceServiceVersion = "2.530.0"
|
||||
val persistenceServiceVersion = "2.531.0"
|
||||
val springBootStarterVersion = "3.1.5"
|
||||
val springCloudVersion = "4.0.4"
|
||||
val testContainersVersion = "1.19.7"
|
||||
|
||||
@ -22,6 +22,8 @@ public class RedactionServiceSettings {
|
||||
|
||||
private boolean nerServiceEnabled = true;
|
||||
|
||||
private boolean azureNerServiceEnabled;
|
||||
|
||||
private boolean priorityMode;
|
||||
|
||||
private long dictionaryCacheMaximumSize = 100;
|
||||
|
||||
@ -15,5 +15,6 @@ public class EntityRecognitionEntity {
|
||||
private int startOffset;
|
||||
private int endOffset;
|
||||
private String type;
|
||||
private Double confidence;
|
||||
|
||||
}
|
||||
|
||||
@ -11,19 +11,15 @@ import lombok.RequiredArgsConstructor;
|
||||
* This class provides logging functionality specifically for rules execution
|
||||
* in a Drools context. It is designed to log messages with different log levels
|
||||
* (INFO, WARN, ERROR) and formats messages using a placeholder-based approach
|
||||
* similar to popular logging frameworks like SLF4J.
|
||||
* similar to popular logging frameworks like SLF4J. <p>
|
||||
*
|
||||
* <p>
|
||||
* Log messages can include placeholders (i.e., `{}`), which will be replaced by
|
||||
* the corresponding arguments when the message is formatted.
|
||||
* </p>
|
||||
* <p>
|
||||
* the corresponding arguments when the message is formatted. <p>
|
||||
*
|
||||
* Example usage:
|
||||
* <pre>
|
||||
* logger.info("Message with placeholder {}", object);
|
||||
* </pre>
|
||||
* </p>
|
||||
*/
|
||||
@RequiredArgsConstructor
|
||||
public class RulesLogger {
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Stream;
|
||||
@ -9,6 +10,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
/**
|
||||
@ -17,7 +19,7 @@ import lombok.experimental.FieldDefaults;
|
||||
*/
|
||||
@Getter
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class NerEntities {
|
||||
|
||||
List<NerEntity> nerEntityList;
|
||||
@ -29,6 +31,14 @@ public class NerEntities {
|
||||
}
|
||||
|
||||
|
||||
public void merge(NerEntities other) {
|
||||
|
||||
List<NerEntity> mergedList = new ArrayList<>(nerEntityList);
|
||||
mergedList.addAll(other.getNerEntityList());
|
||||
nerEntityList = mergedList;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if there are any entities of a specified type.
|
||||
*
|
||||
@ -55,11 +65,16 @@ public class NerEntities {
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Represents a single NER entity with its value, text range, and type.
|
||||
*/
|
||||
public record NerEntity(String value, TextRange textRange, String type) {
|
||||
public record NerEntity(String value, TextRange textRange, String type, Double confidence) {
|
||||
|
||||
public NerEntity(String value, TextRange textRange, String type) {
|
||||
this(value, textRange, type, null);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -336,6 +336,11 @@ public class AnalysisPreparationService {
|
||||
} else {
|
||||
nerEntities = new NerEntities(Collections.emptyList());
|
||||
}
|
||||
if (redactionServiceSettings.isAzureNerServiceEnabled()) {
|
||||
NerEntitiesModel azureNerEntitiesModel = redactionStorageService.getAzureNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
NerEntities azureNerEntities = NerEntitiesAdapter.toNerEntities(azureNerEntitiesModel, document);
|
||||
nerEntities.merge(azureNerEntities);
|
||||
}
|
||||
return nerEntities;
|
||||
}
|
||||
|
||||
@ -391,6 +396,12 @@ public class AnalysisPreparationService {
|
||||
} else {
|
||||
nerEntities = new NerEntities(Collections.emptyList());
|
||||
}
|
||||
if (redactionServiceSettings.isAzureNerServiceEnabled()) {
|
||||
NerEntitiesModel azureNerEntitiesModel = redactionStorageService.getAzureNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
|
||||
azureNerEntitiesModel = filterNerEntitiesModelBySectionIds(sectionsToReanalyseIds, azureNerEntitiesModel);
|
||||
NerEntities azureNerEntities = NerEntitiesAdapter.toNerEntities(azureNerEntitiesModel, document);
|
||||
nerEntities.merge(azureNerEntities);
|
||||
}
|
||||
return nerEntities;
|
||||
}
|
||||
|
||||
|
||||
@ -628,7 +628,9 @@ public class EntityCreationService {
|
||||
.map(bBox -> Pair.of(bBox.getMaxY(), bBox.getMinY()))
|
||||
.map(maxMinPair -> tableNode.streamRow(tableCell.getRow())
|
||||
.filter(nextTableCell -> nextTableCell.getCol() > tableCell.getCol())
|
||||
.map(nextTableCell -> RedactionSearchUtility.findTextRangesOfAllLinesWithCloseYCoordinates(maxMinPair.getLeft(), maxMinPair.getRight(), nextTableCell.getTextBlock()))
|
||||
.map(nextTableCell -> RedactionSearchUtility.findTextRangesOfAllLinesWithCloseYCoordinates(maxMinPair.getLeft(),
|
||||
maxMinPair.getRight(),
|
||||
nextTableCell.getTextBlock()))
|
||||
.map(b -> b.trim(tableNode.getTextBlock()))
|
||||
.filter(boundary -> isValidEntityTextRange(tableNode.getTextBlock(), boundary))
|
||||
.map(boundary -> byTextRange(boundary, type, entityType, tableNode))
|
||||
@ -1223,6 +1225,49 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Optionally creates a text entity based on a Named Entity Recognition (NER) entity
|
||||
* if the confidence of the entity lies above the given minimal confidence.
|
||||
*
|
||||
* @param nerEntity The NER entity used for creating the text entity.
|
||||
* @param minConfidence The minimal confidence required
|
||||
* @param entityType The entity's classification.
|
||||
* @param semanticNode The semantic node related to the NER entity.
|
||||
* @return An {@link Optional} containing the new {@link TextEntity} based on the NER entity, or {@link Optional#empty()} if not created.
|
||||
*/
|
||||
public Optional<TextEntity> optionalByNerEntityWithConfidence(NerEntities.NerEntity nerEntity, Double minConfidence, EntityType entityType, SemanticNode semanticNode) {
|
||||
|
||||
if (nerEntity.confidence() != null && nerEntity.confidence() < minConfidence) {
|
||||
return Optional.empty();
|
||||
}
|
||||
return byTextRangeWithEngine(nerEntity.textRange(), nerEntity.type(), entityType, semanticNode, Set.of(Engine.NER));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Optionally creates a text entity based on a Named Entity Recognition (NER) entity, with a specified type
|
||||
* if the confidence of the entity lies above the given minimal confidence.
|
||||
*
|
||||
* @param nerEntity The NER entity used for creating the text entity.
|
||||
* @param minConfidence The minimal confidence required
|
||||
* @param type Type of the entity.
|
||||
* @param entityType The entity's classification.
|
||||
* @param semanticNode The semantic node related to the NER entity.
|
||||
* @return An {@link Optional} containing the new {@link TextEntity} based on the NER entity, or {@link Optional#empty()} if not created.
|
||||
*/
|
||||
public Optional<TextEntity> optionalByNerEntityWithConfidence(NerEntities.NerEntity nerEntity,
|
||||
Double minConfidence,
|
||||
String type,
|
||||
EntityType entityType,
|
||||
SemanticNode semanticNode) {
|
||||
|
||||
if (nerEntity.confidence() != null && nerEntity.confidence() < minConfidence) {
|
||||
return Optional.empty();
|
||||
}
|
||||
return byTextRangeWithEngine(nerEntity.textRange(), type, entityType, semanticNode, Set.of(Engine.NER));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Combines multiple NER entities into a single text entity.
|
||||
*
|
||||
@ -1241,6 +1286,137 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Combines multiple NER entities into a single text entity based on the specified types and minimum parts to combine.
|
||||
*
|
||||
* @param nerEntities The collection of NER entities to combine.
|
||||
* @param type The type for the combined entity.
|
||||
* @param entityType The classification for the combined entity.
|
||||
* @param semanticNode The semantic node related to these entities.
|
||||
* @param essentialTypes A set of essential types that must be present in the combination.
|
||||
* @param typesToCombine A set of types that should be considered for combination.
|
||||
* @param minPartsToCombine The minimum number of parts that must be combined.
|
||||
* @return A stream of combined {@link TextEntity} objects that match the specified criteria.
|
||||
*/
|
||||
public Stream<TextEntity> combineNerEntities(NerEntities nerEntities,
|
||||
String type,
|
||||
EntityType entityType,
|
||||
SemanticNode semanticNode,
|
||||
Set<String> essentialTypes,
|
||||
Set<String> typesToCombine,
|
||||
int minPartsToCombine) {
|
||||
|
||||
return NerEntitiesAdapter.combineNerEntitiesOfAllGivenTypes(nerEntities, essentialTypes, typesToCombine, minPartsToCombine)
|
||||
.map(boundary -> byTextRangeWithEngine(boundary, type, entityType, semanticNode, Set.of(Engine.NER)))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Combines multiple NER entities into a single text entity based on the specified types, maximum distance between parts, and minimum parts to combine.
|
||||
*
|
||||
* @param nerEntities The collection of NER entities to combine.
|
||||
* @param type The type for the combined entity.
|
||||
* @param entityType The classification for the combined entity.
|
||||
* @param semanticNode The semantic node related to these entities.
|
||||
* @param essentialTypes A set of essential types that must be present in the combination.
|
||||
* @param typesToCombine A set of types that should be considered for combination.
|
||||
* @param maxDistanceBetweenParts The maximum distance allowed between parts to consider them for combination.
|
||||
* @param minPartsToCombine The minimum number of parts that must be combined.
|
||||
* @return A stream of combined {@link TextEntity} objects that match the specified criteria.
|
||||
*/
|
||||
public Stream<TextEntity> combineNerEntities(NerEntities nerEntities,
|
||||
String type,
|
||||
EntityType entityType,
|
||||
SemanticNode semanticNode,
|
||||
Set<String> essentialTypes,
|
||||
Set<String> typesToCombine,
|
||||
int maxDistanceBetweenParts,
|
||||
int minPartsToCombine) {
|
||||
|
||||
return NerEntitiesAdapter.combineNerEntitiesOfAllGivenTypes(nerEntities, essentialTypes, typesToCombine, maxDistanceBetweenParts, minPartsToCombine)
|
||||
.map(boundary -> byTextRangeWithEngine(boundary, type, entityType, semanticNode, Set.of(Engine.NER)))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Combines multiple NER entities into a single text entity based on the specified types, maximum distance between parts, minimum parts to combine, and minimum essential types combined.
|
||||
*
|
||||
* @param nerEntities The collection of NER entities to combine.
|
||||
* @param type The type for the combined entity.
|
||||
* @param entityType The classification for the combined entity.
|
||||
* @param semanticNode The semantic node related to these entities.
|
||||
* @param essentialTypes A set of essential types that must be present in the combination.
|
||||
* @param typesToCombine A set of types that should be considered for combination.
|
||||
* @param maxDistanceBetweenParts The maximum distance allowed between parts to consider them for combination.
|
||||
* @param minPartsToCombine The minimum number of parts that must be combined.
|
||||
* @param minEssentialTypesCombined The minimum number of essential types that must be combined.
|
||||
* @return A stream of combined {@link TextEntity} objects that match the specified criteria.
|
||||
*/
|
||||
public Stream<TextEntity> combineNerEntities(NerEntities nerEntities,
|
||||
String type,
|
||||
EntityType entityType,
|
||||
SemanticNode semanticNode,
|
||||
Set<String> essentialTypes,
|
||||
Set<String> typesToCombine,
|
||||
int maxDistanceBetweenParts,
|
||||
int minPartsToCombine,
|
||||
int minEssentialTypesCombined) {
|
||||
|
||||
return NerEntitiesAdapter.combineNerEntitiesOfAllGivenTypes(nerEntities,
|
||||
essentialTypes,
|
||||
typesToCombine,
|
||||
maxDistanceBetweenParts,
|
||||
minPartsToCombine,
|
||||
minEssentialTypesCombined)
|
||||
.map(boundary -> byTextRangeWithEngine(boundary, type, entityType, semanticNode, Set.of(Engine.NER)))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Combines multiple NER entities into a single text entity based on the specified types, maximum distance between parts, minimum parts to combine, minimum essential types combined, and confidence level.
|
||||
*
|
||||
* @param nerEntities The collection of NER entities to combine.
|
||||
* @param type The type for the combined entity.
|
||||
* @param entityType The classification for the combined entity.
|
||||
* @param semanticNode The semantic node related to these entities.
|
||||
* @param essentialTypes A set of essential types that must be present in the combination.
|
||||
* @param typesToCombine A set of types that should be considered for combination.
|
||||
* @param maxDistanceBetweenParts The maximum distance allowed between parts to consider them for combination.
|
||||
* @param minPartsToCombine The minimum number of parts that must be combined.
|
||||
* @param minEssentialTypesCombined The minimum number of essential types that must be combined.
|
||||
* @param confidence The confidence level required for combining entities.
|
||||
* @return A stream of combined {@link TextEntity} objects that match the specified criteria and confidence level.
|
||||
*/
|
||||
public Stream<TextEntity> combineNerEntitiesWithConfidence(NerEntities nerEntities,
|
||||
String type,
|
||||
EntityType entityType,
|
||||
SemanticNode semanticNode,
|
||||
Set<String> essentialTypes,
|
||||
Set<String> typesToCombine,
|
||||
int maxDistanceBetweenParts,
|
||||
int minPartsToCombine,
|
||||
int minEssentialTypesCombined,
|
||||
Double confidence) {
|
||||
|
||||
return NerEntitiesAdapter.combineNerEntitiesOfAllGivenTypesWithConfidence(nerEntities,
|
||||
essentialTypes,
|
||||
typesToCombine,
|
||||
maxDistanceBetweenParts,
|
||||
minPartsToCombine,
|
||||
minEssentialTypesCombined,
|
||||
confidence)
|
||||
.map(boundary -> byTextRangeWithEngine(boundary, type, entityType, semanticNode, Set.of(Engine.NER)))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Validates if a given text range within a text block represents a valid entity.
|
||||
*
|
||||
|
||||
@ -19,6 +19,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.NonNull;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.UtilityClass;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@ -35,6 +36,7 @@ public class NerEntitiesAdapter {
|
||||
static int MAX_DISTANCE_BETWEEN_PARTS = 20;
|
||||
static int MIN_PARTS_TO_COMBINE = 3;
|
||||
static boolean ALLOW_DUPLICATES;
|
||||
static int MIN_ESSENTIAL_TYPES_COMBINED;
|
||||
|
||||
|
||||
/**
|
||||
@ -49,7 +51,8 @@ public class NerEntitiesAdapter {
|
||||
return new NerEntities(addOffsetsAndFlatten(getStringStartOffsetsForMainSectionsHeadersFooters(document), nerEntitiesModel).map(nerEntityModel -> new NerEntities.NerEntity(
|
||||
nerEntityModel.getValue(),
|
||||
new TextRange(nerEntityModel.getStartOffset(), nerEntityModel.getEndOffset()),
|
||||
nerEntityModel.getType()))
|
||||
nerEntityModel.getType(),
|
||||
nerEntityModel.getConfidence()))
|
||||
.toList());
|
||||
}
|
||||
|
||||
@ -60,12 +63,13 @@ public class NerEntitiesAdapter {
|
||||
* The first part must be an EntityRecognitionEntity of an essential type.
|
||||
* The resulting list must consist of at least minPartsToCombine parts.
|
||||
*
|
||||
* @param nerEntities already validated entities from the NER Service
|
||||
* @param essentialTypes the combined entities must contain at least one of these types
|
||||
* @param typesToCombine all types which should be used to combine, must contain all essentialTypes
|
||||
* @param maxDistanceBetweenParts all parts used to combine should be at most this value apart in string offset coordinates
|
||||
* @param minPartsToCombine minimum number of parts to combine
|
||||
* @param allowDuplicates allow combining multiple parts of same type
|
||||
* @param nerEntities already validated entities from the NER Service
|
||||
* @param essentialTypes the combined entities must contain at least one of these types
|
||||
* @param typesToCombine all types which should be used to combine, must contain all essentialTypes
|
||||
* @param maxDistanceBetweenParts all parts used to combine should be at most this value apart in string offset coordinates
|
||||
* @param minPartsToCombine minimum number of parts to combine
|
||||
* @param minEssentialTypesCombined minimum number of essential types that must be part of a combination
|
||||
* @param allowDuplicates allow combining multiple parts of same type
|
||||
* @return A Stream of the combined boundaries
|
||||
*/
|
||||
public Stream<TextRange> combineNerEntities(NerEntities nerEntities,
|
||||
@ -73,51 +77,107 @@ public class NerEntitiesAdapter {
|
||||
Set<String> typesToCombine,
|
||||
int maxDistanceBetweenParts,
|
||||
int minPartsToCombine,
|
||||
int minEssentialTypesCombined,
|
||||
boolean allowDuplicates) {
|
||||
|
||||
List<NerEntities.NerEntity> sortedEntities = nerEntities.getNerEntityList()
|
||||
.stream()
|
||||
.filter(entity -> typesToCombine.contains(entity.type()))
|
||||
.sorted(Comparator.comparingInt(entity -> entity.textRange().start()))
|
||||
.toList();
|
||||
return getTextRangeStream(nerEntities, essentialTypes, typesToCombine, maxDistanceBetweenParts, minPartsToCombine, minEssentialTypesCombined, allowDuplicates);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@NonNull
|
||||
private static Stream<TextRange> getTextRangeStream(NerEntities nerEntities,
|
||||
Set<String> essentialTypes,
|
||||
Set<String> typesToCombine,
|
||||
int maxDistanceBetweenParts,
|
||||
int minPartsToCombine,
|
||||
int minEssentialTypesCombined,
|
||||
boolean allowDuplicates) {
|
||||
|
||||
List<NerEntities.NerEntity> sortedEntities = getSortedEntities(nerEntities, typesToCombine);
|
||||
|
||||
if (sortedEntities.isEmpty()) {
|
||||
return Stream.empty();
|
||||
}
|
||||
|
||||
List<List<NerEntities.NerEntity>> entityClusters = new LinkedList<>();
|
||||
|
||||
List<NerEntities.NerEntity> startEntitiesOfEssentialType = sortedEntities.stream()
|
||||
.filter(e -> essentialTypes.contains(e.type()))
|
||||
.toList();
|
||||
for (NerEntities.NerEntity startEntity : startEntitiesOfEssentialType) {
|
||||
List<NerEntities.NerEntity> currentCluster = new LinkedList<>();
|
||||
entityClusters.add(currentCluster);
|
||||
int lastEndOffset = startEntity.textRange().end();
|
||||
|
||||
for (NerEntities.NerEntity entity : sortedEntities) {
|
||||
if (entity.textRange().start() < lastEndOffset) {
|
||||
continue;
|
||||
}
|
||||
if (distanceIsLargerThanMaxDistance(lastEndOffset, entity, maxDistanceBetweenParts) || isDuplicate(currentCluster, entity, allowDuplicates)) {
|
||||
currentCluster = new LinkedList<>();
|
||||
entityClusters.add(currentCluster);
|
||||
currentCluster.add(entity);
|
||||
lastEndOffset = entity.textRange().end();
|
||||
} else {
|
||||
currentCluster.add(entity);
|
||||
lastEndOffset = entity.textRange().end();
|
||||
}
|
||||
}
|
||||
}
|
||||
List<List<NerEntities.NerEntity>> entityClusters = getEntityClusters(essentialTypes,
|
||||
maxDistanceBetweenParts,
|
||||
minPartsToCombine,
|
||||
minEssentialTypesCombined,
|
||||
allowDuplicates,
|
||||
sortedEntities);
|
||||
|
||||
return entityClusters.stream()
|
||||
.filter(cluster -> cluster.size() >= minPartsToCombine)
|
||||
.map(NerEntitiesAdapter::toContainingBoundary)
|
||||
.distinct();
|
||||
}
|
||||
|
||||
|
||||
private static List<NerEntities.NerEntity> getSortedEntities(NerEntities nerEntities, Set<String> typesToCombine) {
|
||||
|
||||
return nerEntities.getNerEntityList()
|
||||
.stream()
|
||||
.filter(entity -> typesToCombine.contains(entity.type()))
|
||||
.sorted(Comparator.comparingInt(entity -> entity.textRange().start()))
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
private static List<List<NerEntities.NerEntity>> getEntityClusters(Set<String> essentialTypes,
|
||||
int maxDistanceBetweenParts,
|
||||
int minPartsToCombine,
|
||||
int minEssentialTypesCombined,
|
||||
boolean allowDuplicates,
|
||||
List<NerEntities.NerEntity> sortedEntities) {
|
||||
|
||||
List<List<NerEntities.NerEntity>> entityClusters = new LinkedList<>();
|
||||
List<NerEntities.NerEntity> currentCluster = new LinkedList<>();
|
||||
int lastEndOffset = -1;
|
||||
|
||||
for (NerEntities.NerEntity entity : sortedEntities) {
|
||||
|
||||
// cluster can be added as it is either duplicated or distance is too large
|
||||
if (!currentCluster.isEmpty() && (distanceIsLargerThanMaxDistance(lastEndOffset, entity, maxDistanceBetweenParts) || isDuplicate(currentCluster,
|
||||
entity,
|
||||
allowDuplicates))) {
|
||||
entityClusters.add(new LinkedList<>(currentCluster));
|
||||
currentCluster.clear();
|
||||
}
|
||||
|
||||
currentCluster.add(entity);
|
||||
lastEndOffset = entity.textRange().end();
|
||||
}
|
||||
|
||||
// add the last cluster if not empty
|
||||
if (!currentCluster.isEmpty()) {
|
||||
entityClusters.add(currentCluster);
|
||||
}
|
||||
|
||||
return filterClusters(entityClusters, essentialTypes, minPartsToCombine, minEssentialTypesCombined);
|
||||
}
|
||||
|
||||
|
||||
private static List<List<NerEntities.NerEntity>> filterClusters(List<List<NerEntities.NerEntity>> clusters,
|
||||
Set<String> essentialTypes,
|
||||
int minPartsToCombine,
|
||||
int minEssentialTypesCombined) {
|
||||
|
||||
return clusters.stream()
|
||||
.filter(cluster -> cluster.size() >= minPartsToCombine)
|
||||
.filter(cluster -> countEssentialEntities(cluster, essentialTypes) >= minEssentialTypesCombined)
|
||||
.distinct()
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
private static long countEssentialEntities(List<NerEntities.NerEntity> cluster, Set<String> essentialTypes) {
|
||||
|
||||
return cluster.stream()
|
||||
.filter(entity -> essentialTypes.contains(entity.type()))
|
||||
.count();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calls combine NerEntities with the following settings.
|
||||
* <p>
|
||||
@ -137,6 +197,76 @@ public class NerEntitiesAdapter {
|
||||
CBI_ADDRESS_TYPES_TO_COMBINE,
|
||||
MAX_DISTANCE_BETWEEN_PARTS,
|
||||
MIN_PARTS_TO_COMBINE,
|
||||
MIN_ESSENTIAL_TYPES_COMBINED,
|
||||
ALLOW_DUPLICATES);
|
||||
}
|
||||
|
||||
|
||||
public Stream<TextRange> combineNerEntitiesOfAllGivenTypes(NerEntities entityRecognitionEntities,
|
||||
Set<String> essentialTypes,
|
||||
Set<String> typesToCombine,
|
||||
int minPartsToCombine) {
|
||||
|
||||
return combineNerEntities(entityRecognitionEntities,
|
||||
essentialTypes,
|
||||
typesToCombine,
|
||||
MAX_DISTANCE_BETWEEN_PARTS,
|
||||
minPartsToCombine,
|
||||
MIN_ESSENTIAL_TYPES_COMBINED,
|
||||
ALLOW_DUPLICATES);
|
||||
}
|
||||
|
||||
|
||||
public Stream<TextRange> combineNerEntitiesOfAllGivenTypes(NerEntities entityRecognitionEntities,
|
||||
Set<String> essentialTypes,
|
||||
Set<String> typesToCombine,
|
||||
int maxDistanceBetweenParts,
|
||||
int minPartsToCombine) {
|
||||
|
||||
return combineNerEntities(entityRecognitionEntities,
|
||||
essentialTypes,
|
||||
typesToCombine,
|
||||
maxDistanceBetweenParts,
|
||||
minPartsToCombine,
|
||||
MIN_ESSENTIAL_TYPES_COMBINED,
|
||||
ALLOW_DUPLICATES);
|
||||
}
|
||||
|
||||
|
||||
public Stream<TextRange> combineNerEntitiesOfAllGivenTypes(NerEntities entityRecognitionEntities,
|
||||
Set<String> essentialTypes,
|
||||
Set<String> typesToCombine,
|
||||
int maxDistanceBetweenParts,
|
||||
int minPartsToCombine,
|
||||
int minEssentialTypesCombined) {
|
||||
|
||||
return combineNerEntities(entityRecognitionEntities,
|
||||
essentialTypes,
|
||||
typesToCombine,
|
||||
maxDistanceBetweenParts,
|
||||
minPartsToCombine,
|
||||
minEssentialTypesCombined,
|
||||
ALLOW_DUPLICATES);
|
||||
}
|
||||
|
||||
|
||||
public Stream<TextRange> combineNerEntitiesOfAllGivenTypesWithConfidence(NerEntities entityRecognitionEntities,
|
||||
Set<String> essentialTypes,
|
||||
Set<String> typesToCombine,
|
||||
int maxDistanceBetweenParts,
|
||||
int minPartsToCombine,
|
||||
int minEssentialTypesCombined,
|
||||
Double confidence) {
|
||||
|
||||
return combineNerEntities(new NerEntities(entityRecognitionEntities.getNerEntityList()
|
||||
.stream()
|
||||
.filter(nerEntity -> nerEntity.confidence() == null || nerEntity.confidence() >= confidence)
|
||||
.toList()),
|
||||
essentialTypes,
|
||||
typesToCombine,
|
||||
maxDistanceBetweenParts,
|
||||
minPartsToCombine,
|
||||
minEssentialTypesCombined,
|
||||
ALLOW_DUPLICATES);
|
||||
}
|
||||
|
||||
|
||||
@ -153,14 +153,13 @@ public class RedactionStorageService {
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Timed("redactmanager_getImportedLegalBases")
|
||||
public ImportedLegalBases getImportedLegalBases(String dossierId, String fileId) {
|
||||
|
||||
try {
|
||||
return storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMPORTED_LEGAL_BASES),
|
||||
ImportedLegalBases.class);
|
||||
StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMPORTED_LEGAL_BASES),
|
||||
ImportedLegalBases.class);
|
||||
} catch (StorageObjectDoesNotExist e) {
|
||||
log.debug("Imported legal bases not available.");
|
||||
return new ImportedLegalBases();
|
||||
@ -295,6 +294,17 @@ public class RedactionStorageService {
|
||||
}
|
||||
|
||||
|
||||
@Timed("redactmanager_getAzureNerEntities")
|
||||
public NerEntitiesModel getAzureNerEntities(String dossierId, String fileId) {
|
||||
|
||||
try {
|
||||
return storageService.readJSONObject(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.AZURE_NER_ENTITIES), NerEntitiesModel.class);
|
||||
} catch (StorageObjectDoesNotExist e) {
|
||||
throw new NotFoundException("NER Entities are not available.");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public ComponentLog getComponentLog(String dossierId, String fileId) {
|
||||
|
||||
try {
|
||||
|
||||
@ -6,9 +6,11 @@ import static org.wildfly.common.Assert.assertTrue;
|
||||
import java.awt.Color;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
@ -66,15 +68,38 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
|
||||
String filePath = "files/new/crafted document.pdf";
|
||||
String nerEntitiesFilePath = "ner_entities/crafted document.NER_ENTITIES.json";
|
||||
Document document = buildGraphNoImages(filePath);
|
||||
List<NerEntities.NerEntity> entityRecognitionEntities = validateAndCombine(parseNerEntities(nerEntitiesFilePath), document);
|
||||
NerEntities nerEntities = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document);
|
||||
getNerEntitiesForFile(new ClassPathResource(filePath), document, nerEntities);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testGetAzureNerEntities() {
|
||||
|
||||
String filePath = "files/new/intertek.ORIGIN.pdf";
|
||||
String nerEntitiesFilePath = "ner_entities/intertek.NER_ENTITIES.json";
|
||||
String azureNerEntitiesFilePath = "ner_entities/intertek.AZURE_NER_ENTITIES.json";
|
||||
Document document = buildGraphNoImages(filePath);
|
||||
NerEntities azureNerEntities = NerEntitiesAdapter.toNerEntities(parseNerEntities(azureNerEntitiesFilePath), document);
|
||||
NerEntities nerEntities = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document);
|
||||
nerEntities.merge(azureNerEntities);
|
||||
getNerEntitiesForFile(new ClassPathResource(filePath), document, nerEntities);
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void getNerEntitiesForFile(ClassPathResource resource, Document document, NerEntities nerEntities) throws IOException {
|
||||
|
||||
List<NerEntities.NerEntity> entityRecognitionEntities = validateAndCombine(nerEntities, document);
|
||||
assertFalse(entityRecognitionEntities.isEmpty());
|
||||
assertTrue(entityRecognitionEntities.stream()
|
||||
.allMatch(entity -> entity.textRange().start() < entity.textRange().end()));
|
||||
|
||||
ClassPathResource resource = new ClassPathResource(filePath);
|
||||
try (PDDocument pdDocument = Loader.loadPDF(resource.getFile())) {
|
||||
|
||||
Stream<NerEntities.NerEntity> unchangedAddressParts = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document).getNerEntityList()
|
||||
Stream<NerEntities.NerEntity> unchangedAddressParts = nerEntities.getNerEntityList()
|
||||
.stream()
|
||||
.filter(e -> !e.type().equals("CBI_author"));
|
||||
List<TextEntity> redactionEntities = Stream.concat(entityRecognitionEntities.stream(), unchangedAddressParts)
|
||||
@ -96,7 +121,6 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
|
||||
File outputFile = new File("/tmp/nerEntities.pdf");
|
||||
pdDocument.save(outputFile);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -150,16 +174,37 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
private List<NerEntities.NerEntity> validateAndCombine(NerEntitiesModel nerEntitiesModel, Document document) {
|
||||
|
||||
NerEntities nerEntities = NerEntitiesAdapter.toNerEntities(nerEntitiesModel, document);
|
||||
private List<NerEntities.NerEntity> validateAndCombine(NerEntities nerEntities, Document document) {
|
||||
|
||||
List<NerEntities.NerEntity> cbiAuthors = nerEntities.streamEntitiesOfType("CBI_author")
|
||||
.toList();
|
||||
Stream<NerEntities.NerEntity> cbiAddress = NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
|
||||
.map(boundary -> new NerEntities.NerEntity(document.getTextBlock().subSequence(boundary).toString(), boundary, "CBI_address"));
|
||||
Stream<NerEntities.NerEntity> azureCbiAddress = NerEntitiesAdapter.combineNerEntitiesOfAllGivenTypesWithConfidence(nerEntities,
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"CITY"),
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"Quantity",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"POSTAL",
|
||||
"COUNTRY",
|
||||
"CARDINAL",
|
||||
"CITY",
|
||||
"STATE"),
|
||||
50,
|
||||
3,
|
||||
2,
|
||||
0.7)
|
||||
.map(boundary -> new NerEntities.NerEntity(document.getTextBlock().subSequence(boundary).toString(), boundary, "CBI_address"));
|
||||
|
||||
return Stream.concat(cbiAuthors.stream(), cbiAddress)
|
||||
return Stream.concat(cbiAuthors.stream(), Stream.concat(cbiAddress, azureCbiAddress))
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
@ -1084,6 +1084,84 @@ rule "AI.1.0: Combine and add NER Entities as CBI_address"
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.4
|
||||
rule "AI.4.0: Add all NER Entities of type Person"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Person"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Person")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.4.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.5
|
||||
rule "AI.5.0: Combine and add NER Entities as CBI_address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
|
||||
then
|
||||
entityCreationService
|
||||
.combineNerEntitiesWithConfidence(
|
||||
nerEntities,
|
||||
"CBI_address",
|
||||
EntityType.RECOMMENDATION,
|
||||
document,
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"CITY"),
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"POSTAL",
|
||||
"COUNTRY",
|
||||
"CARDINAL",
|
||||
"CITY",
|
||||
"STATE"),
|
||||
50,
|
||||
3,
|
||||
2,
|
||||
0.7)
|
||||
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.6
|
||||
rule "AI.6.0: Add all NER Entities of type Location"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Location")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.6.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.7
|
||||
rule "AI.7.0: Add all NER Entities of type Address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Address"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Address")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.7.0", "")));
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ Manual changes rules ------------------------------------
|
||||
|
||||
// Rule unit: MAN.0
|
||||
|
||||
@ -1738,6 +1738,84 @@ rule "AI.3.0: Recommend authors from AI as PII"
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.4
|
||||
rule "AI.4.0: Add all NER Entities of type Person"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Person"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Person")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.4.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.5
|
||||
rule "AI.5.0: Combine and add NER Entities as CBI_address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
|
||||
then
|
||||
entityCreationService
|
||||
.combineNerEntitiesWithConfidence(
|
||||
nerEntities,
|
||||
"CBI_address",
|
||||
EntityType.RECOMMENDATION,
|
||||
document,
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"CITY"),
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"POSTAL",
|
||||
"COUNTRY",
|
||||
"CARDINAL",
|
||||
"CITY",
|
||||
"STATE"),
|
||||
50,
|
||||
3,
|
||||
2,
|
||||
0.7)
|
||||
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.6
|
||||
rule "AI.6.0: Add all NER Entities of type Location"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Location")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.6.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.7
|
||||
rule "AI.7.0: Add all NER Entities of type Address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Address"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Address")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.7.0", "")));
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ Manual changes rules ------------------------------------
|
||||
|
||||
// Rule unit: MAN.0
|
||||
|
||||
@ -1149,6 +1149,86 @@ rule "DOC.35.0: Doses (mg/kg bodyweight)"
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ AI rules ------------------------------------
|
||||
|
||||
// Rule unit: AI.4
|
||||
rule "AI.4.0: Add all NER Entities of type Person"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Person"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Person")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.4.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.5
|
||||
rule "AI.5.0: Combine and add NER Entities as CBI_address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
|
||||
then
|
||||
entityCreationService
|
||||
.combineNerEntitiesWithConfidence(
|
||||
nerEntities,
|
||||
"CBI_address",
|
||||
EntityType.RECOMMENDATION,
|
||||
document,
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"CITY"),
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"POSTAL",
|
||||
"COUNTRY",
|
||||
"CARDINAL",
|
||||
"CITY",
|
||||
"STATE"),
|
||||
50,
|
||||
3,
|
||||
2,
|
||||
0.7)
|
||||
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.6
|
||||
rule "AI.6.0: Add all NER Entities of type Location"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Location")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.6.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.7
|
||||
rule "AI.7.0: Add all NER Entities of type Address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Address"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Address")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.7.0", "")));
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ Manual changes rules ------------------------------------
|
||||
|
||||
// Rule unit: MAN.0
|
||||
|
||||
@ -643,6 +643,84 @@ rule "AI.1.0: Combine and add NER Entities as CBI_address"
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.4
|
||||
rule "AI.4.0: Add all NER Entities of type Person"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Person"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Person")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.4.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.5
|
||||
rule "AI.5.0: Combine and add NER Entities as CBI_address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
|
||||
then
|
||||
entityCreationService
|
||||
.combineNerEntitiesWithConfidence(
|
||||
nerEntities,
|
||||
"CBI_address",
|
||||
EntityType.RECOMMENDATION,
|
||||
document,
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"CITY"),
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"POSTAL",
|
||||
"COUNTRY",
|
||||
"CARDINAL",
|
||||
"CITY",
|
||||
"STATE"),
|
||||
50,
|
||||
3,
|
||||
2,
|
||||
0.7)
|
||||
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.6
|
||||
rule "AI.6.0: Add all NER Entities of type Location"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Location")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.6.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.7
|
||||
rule "AI.7.0: Add all NER Entities of type Address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Address"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Address")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.7.0", "")));
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ Manual changes rules ------------------------------------
|
||||
|
||||
// Rule unit: MAN.0
|
||||
|
||||
@ -69,6 +69,86 @@ query "getFileAttributes"
|
||||
$fileAttribute: FileAttribute()
|
||||
end
|
||||
|
||||
//------------------------------------ AI rules ------------------------------------
|
||||
|
||||
// Rule unit: AI.4
|
||||
rule "AI.4.0: Add all NER Entities of type Person"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Person"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Person")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.4.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.5
|
||||
rule "AI.5.0: Combine and add NER Entities as CBI_address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
|
||||
then
|
||||
entityCreationService
|
||||
.combineNerEntitiesWithConfidence(
|
||||
nerEntities,
|
||||
"CBI_address",
|
||||
EntityType.RECOMMENDATION,
|
||||
document,
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"CITY"),
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"POSTAL",
|
||||
"COUNTRY",
|
||||
"CARDINAL",
|
||||
"CITY",
|
||||
"STATE"),
|
||||
50,
|
||||
3,
|
||||
2,
|
||||
0.7)
|
||||
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.6
|
||||
rule "AI.6.0: Add all NER Entities of type Location"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Location")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.6.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.7
|
||||
rule "AI.7.0: Add all NER Entities of type Address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Address"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Address")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.7.0", "")));
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ Manual changes rules ------------------------------------
|
||||
|
||||
// Rule unit: MAN.0
|
||||
|
||||
@ -1173,6 +1173,85 @@ rule "AI.1.0: Combine and add NER Entities as CBI_address"
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.4
|
||||
rule "AI.4.0: Add all NER Entities of type Person"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Person"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Person")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.4.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.5
|
||||
rule "AI.5.0: Combine and add NER Entities as CBI_address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
|
||||
then
|
||||
entityCreationService
|
||||
.combineNerEntitiesWithConfidence(
|
||||
nerEntities,
|
||||
"CBI_address",
|
||||
EntityType.RECOMMENDATION,
|
||||
document,
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"CITY"),
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"POSTAL",
|
||||
"COUNTRY",
|
||||
"CARDINAL",
|
||||
"CITY",
|
||||
"STATE"),
|
||||
50,
|
||||
3,
|
||||
2,
|
||||
0.7)
|
||||
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
|
||||
end
|
||||
|
||||
|
||||
|
||||
// Rule unit: AI.6
|
||||
rule "AI.6.0: Add all NER Entities of type Location"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Location")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.6.0", "")));
|
||||
end
|
||||
|
||||
|
||||
|
||||
// Rule unit: AI.7
|
||||
rule "AI.7.0: Add all NER Entities of type Address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Address"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Address")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.7.0", "")));
|
||||
end
|
||||
|
||||
//------------------------------------ Manual changes rules ------------------------------------
|
||||
|
||||
// Rule unit: MAN.0
|
||||
|
||||
@ -108,6 +108,84 @@ rule "AI.0.0: Add all NER Entities of type CBI_author"
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.4
|
||||
rule "AI.4.0: Add all NER Entities of type Person"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Person"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Person")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.4.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.5
|
||||
rule "AI.5.0: Combine and add NER Entities as CBI_address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
|
||||
then
|
||||
entityCreationService
|
||||
.combineNerEntitiesWithConfidence(
|
||||
nerEntities,
|
||||
"CBI_address",
|
||||
EntityType.RECOMMENDATION,
|
||||
document,
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"CITY"),
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"POSTAL",
|
||||
"COUNTRY",
|
||||
"CARDINAL",
|
||||
"CITY",
|
||||
"STATE"),
|
||||
50,
|
||||
3,
|
||||
2,
|
||||
0.7)
|
||||
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.6
|
||||
rule "AI.6.0: Add all NER Entities of type Location"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Location")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.6.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.7
|
||||
rule "AI.7.0: Add all NER Entities of type Address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Address"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Address")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.7.0", "")));
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ Manual changes rules ------------------------------------
|
||||
|
||||
// Rule unit: MAN.0
|
||||
|
||||
@ -219,6 +219,86 @@ rule "TAB.7.0: Indicator (Species)"
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ AI rules ------------------------------------
|
||||
|
||||
// Rule unit: AI.4
|
||||
rule "AI.4.0: Add all NER Entities of type Person"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Person"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Person")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.4.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.5
|
||||
rule "AI.5.0: Combine and add NER Entities as CBI_address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
|
||||
then
|
||||
entityCreationService
|
||||
.combineNerEntitiesWithConfidence(
|
||||
nerEntities,
|
||||
"CBI_address",
|
||||
EntityType.RECOMMENDATION,
|
||||
document,
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"CITY"),
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"POSTAL",
|
||||
"COUNTRY",
|
||||
"CARDINAL",
|
||||
"CITY",
|
||||
"STATE"),
|
||||
50,
|
||||
3,
|
||||
2,
|
||||
0.7)
|
||||
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.6
|
||||
rule "AI.6.0: Add all NER Entities of type Location"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Location")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.6.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.7
|
||||
rule "AI.7.0: Add all NER Entities of type Address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Address"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Address")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.7.0", "")));
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ Manual changes rules ------------------------------------
|
||||
|
||||
// Rule unit: MAN.0
|
||||
|
||||
@ -119,6 +119,86 @@ rule "TAB.6.0: Targeted cell extraction (Experimental Stop date)"
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ AI rules ------------------------------------
|
||||
|
||||
// Rule unit: AI.4
|
||||
rule "AI.4.0: Add all NER Entities of type Person"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Person"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Person")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.4.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.5
|
||||
rule "AI.5.0: Combine and add NER Entities as CBI_address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
|
||||
then
|
||||
entityCreationService
|
||||
.combineNerEntitiesWithConfidence(
|
||||
nerEntities,
|
||||
"CBI_address",
|
||||
EntityType.RECOMMENDATION,
|
||||
document,
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"CITY"),
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"POSTAL",
|
||||
"COUNTRY",
|
||||
"CARDINAL",
|
||||
"CITY",
|
||||
"STATE"),
|
||||
50,
|
||||
3,
|
||||
2,
|
||||
0.7)
|
||||
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.6
|
||||
rule "AI.6.0: Add all NER Entities of type Location"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Location")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.6.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.7
|
||||
rule "AI.7.0: Add all NER Entities of type Address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Address"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Address")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.7.0", "")));
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ Manual changes rules ------------------------------------
|
||||
|
||||
// Rule unit: MAN.0
|
||||
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -0,0 +1 @@
|
||||
{"dossierId": "2e41b84e-30ed-4098-b722-ed309a8a5bfb", "fileId": "caf8ba677d05df7a6625449e43c20baf", "targetFileExtension": "SIMPLIFIED_TEXT.json.gz", "responseFileExtension": "NER_ENTITIES.json.gz", "X-TENANT-ID": "redaction", "data": {"2.1.1": [{"value": "Corporation Buckingham Palace Buckingham Palace Road", "startOffset": 73, "endOffset": 125, "type": "DEPARTMENT"}, {"value": "Head of Alliance Management", "startOffset": 194, "endOffset": 221, "type": "JOB_TITEL"}, {"value": "john.smith@smithcorp.com", "startOffset": 246, "endOffset": 270, "type": "MAIL"}], "2.1.2": [{"value": "Corporation Buckingham Palace Buckingham Palace Road", "startOffset": 79, "endOffset": 131, "type": "DEPARTMENT"}, {"value": "Head of Manufacture Tel", "startOffset": 199, "endOffset": 222, "type": "JOB_TITEL"}, {"value": "mimi.lang@smithcorp.com", "startOffset": 243, "endOffset": 266, "type": "MAIL"}], "2.1.3": [{"value": "+44 (0)1252 392460 Email:", "startOffset": 139, "endOffset": 164, "type": "PHONE"}, {"value": "United Kingdom", "startOffset": 338, "endOffset": 352, "type": "COUNTRY"}], "2.1.6.1": [{"value": "EU", "startOffset": 90, "endOffset": 92, "type": "ORG"}, {"value": "EU", "startOffset": 263, "endOffset": 265, "type": "ORG"}, {"value": "European Food Safety Authority", "startOffset": 1280, "endOffset": 1310, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 1373, "endOffset": 1387, "type": "ORG"}], "2.1.6.4": [{"value": "Gidley", "startOffset": 66, "endOffset": 72, "type": "CBI_author"}, {"value": "Dentini", "startOffset": 149, "endOffset": 156, "type": "CBI_author"}, {"value": "Lang", "startOffset": 87, "endOffset": 91, "type": "CBI_author"}, {"value": "Kajiwara", "startOffset": 96, "endOffset": 104, "type": "CBI_author"}, {"value": "Kato", "startOffset": 112, "endOffset": 116, "type": "CBI_author"}, {"value": "Lang", "startOffset": 184, "endOffset": 188, "type": "CBI_author"}], "2.1.9": [{"value": "EFSA", "startOffset": 4101, "endOffset": 4105, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 4161, "endOffset": 4175, "type": "ORG"}, {"value": "Ames", "startOffset": 2392, "endOffset": 2396, "type": "NO_AUTHOR"}], "2.1.10.2": [{"value": "EFSA", "startOffset": 942, "endOffset": 946, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 1002, "endOffset": 1016, "type": "ORG"}, {"value": "EFSA", "startOffset": 1101, "endOffset": 1105, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 1161, "endOffset": 1175, "type": "ORG"}, {"value": "EFSA", "startOffset": 1788, "endOffset": 1792, "type": "ORG"}], "2.1.10.3": [{"value": "EFSA", "startOffset": 129, "endOffset": 133, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 189, "endOffset": 203, "type": "ORG"}], "2.1.10.4": [{"value": "EFSA", "startOffset": 129, "endOffset": 133, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 189, "endOffset": 203, "type": "ORG"}, {"value": "EFSA", "startOffset": 2618, "endOffset": 2622, "type": "ORG"}, {"value": "European Food Safety Authority", "startOffset": 2625, "endOffset": 2655, "type": "ORG"}, {"value": "European Food Safety Authority", "startOffset": 3893, "endOffset": 3923, "type": "ORG"}], "2.1.10.5": [{"value": "EFSA", "startOffset": 129, "endOffset": 133, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 189, "endOffset": 203, "type": "ORG"}], "17": [{"value": "2", "startOffset": 18, "endOffset": 19, "type": "CARDINAL"}]}}
|
||||
@ -1734,6 +1734,88 @@ rule "AI.3.0: Recommend authors from AI as PII"
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntity(nerEntity, "PII", EntityType.RECOMMENDATION, document));
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
// Rule unit: AI.4
|
||||
rule "AI.4.0: Add all NER Entities of type Person"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Person"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Person")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.4.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.5
|
||||
rule "AI.5.0: Combine and add NER Entities as CBI_address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
|
||||
then
|
||||
entityCreationService
|
||||
.combineNerEntitiesWithConfidence(
|
||||
nerEntities,
|
||||
"CBI_address",
|
||||
EntityType.RECOMMENDATION,
|
||||
document,
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"CITY"),
|
||||
Set.of("Organization",
|
||||
"Location",
|
||||
"Address",
|
||||
"ORG",
|
||||
"STREET",
|
||||
"POSTAL",
|
||||
"COUNTRY",
|
||||
"CARDINAL",
|
||||
"CITY",
|
||||
"STATE"),
|
||||
50,
|
||||
3,
|
||||
2,
|
||||
0.7)
|
||||
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
|
||||
end
|
||||
|
||||
|
||||
|
||||
// Rule unit: AI.6
|
||||
rule "AI.6.0: Add all NER Entities of type Location"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Location"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Location")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.6.0", "")));
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: AI.7
|
||||
rule "AI.7.0: Add all NER Entities of type Address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("Address"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("Address")
|
||||
.filter(entity -> entity.value().length() > 3)
|
||||
.filter(entity -> entity.value().length() < 100)
|
||||
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
|
||||
.ifPresent(e -> e.skip("AI.7.0", "")));
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ Manual redaction rules ------------------------------------
|
||||
|
||||
// Rule unit: MAN.0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user