Merge branch 'AZURE_NER_FP' into 'master'

RED-9918: Azure entity recognition (Spike)

See merge request redactmanager/redaction-service!496
This commit is contained in:
Maverick Studer 2024-08-26 14:34:45 +02:00
commit 9d668f9be1
24 changed files with 7326 additions and 60 deletions

View File

@ -4,7 +4,7 @@ plugins {
}
description = "redaction-service-api-v1"
val persistenceServiceVersion = "2.530.0"
val persistenceServiceVersion = "2.531.0"
dependencies {
implementation("org.springframework:spring-web:6.0.12")

View File

@ -16,7 +16,7 @@ val layoutParserVersion = "0.141.0"
val jacksonVersion = "2.15.2"
val droolsVersion = "9.44.0.Final"
val pdfBoxVersion = "3.0.0"
val persistenceServiceVersion = "2.530.0"
val persistenceServiceVersion = "2.531.0"
val springBootStarterVersion = "3.1.5"
val springCloudVersion = "4.0.4"
val testContainersVersion = "1.19.7"

View File

@ -22,6 +22,8 @@ public class RedactionServiceSettings {
private boolean nerServiceEnabled = true;
private boolean azureNerServiceEnabled;
private boolean priorityMode;
private long dictionaryCacheMaximumSize = 100;

View File

@ -15,5 +15,6 @@ public class EntityRecognitionEntity {
private int startOffset;
private int endOffset;
private String type;
private Double confidence;
}

View File

@ -11,19 +11,15 @@ import lombok.RequiredArgsConstructor;
* This class provides logging functionality specifically for rules execution
* in a Drools context. It is designed to log messages with different log levels
* (INFO, WARN, ERROR) and formats messages using a placeholder-based approach
* similar to popular logging frameworks like SLF4J.
* similar to popular logging frameworks like SLF4J. <p>
*
* <p>
* Log messages can include placeholders (i.e., `{}`), which will be replaced by
* the corresponding arguments when the message is formatted.
* </p>
* <p>
* the corresponding arguments when the message is formatted. <p>
*
* Example usage:
* <pre>
* logger.info("Message with placeholder {}", object);
* </pre>
* </p>
*/
@RequiredArgsConstructor
public class RulesLogger {

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.model;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.stream.Stream;
@ -9,6 +10,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
/**
@ -17,7 +19,7 @@ import lombok.experimental.FieldDefaults;
*/
@Getter
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
@FieldDefaults(level = AccessLevel.PRIVATE)
public class NerEntities {
List<NerEntity> nerEntityList;
@ -29,6 +31,14 @@ public class NerEntities {
}
public void merge(NerEntities other) {
List<NerEntity> mergedList = new ArrayList<>(nerEntityList);
mergedList.addAll(other.getNerEntityList());
nerEntityList = mergedList;
}
/**
* Checks if there are any entities of a specified type.
*
@ -55,11 +65,16 @@ public class NerEntities {
}
/**
* Represents a single NER entity with its value, text range, and type.
*/
public record NerEntity(String value, TextRange textRange, String type) {
public record NerEntity(String value, TextRange textRange, String type, Double confidence) {
public NerEntity(String value, TextRange textRange, String type) {
this(value, textRange, type, null);
}
}
}

View File

@ -336,6 +336,11 @@ public class AnalysisPreparationService {
} else {
nerEntities = new NerEntities(Collections.emptyList());
}
if (redactionServiceSettings.isAzureNerServiceEnabled()) {
NerEntitiesModel azureNerEntitiesModel = redactionStorageService.getAzureNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
NerEntities azureNerEntities = NerEntitiesAdapter.toNerEntities(azureNerEntitiesModel, document);
nerEntities.merge(azureNerEntities);
}
return nerEntities;
}
@ -391,6 +396,12 @@ public class AnalysisPreparationService {
} else {
nerEntities = new NerEntities(Collections.emptyList());
}
if (redactionServiceSettings.isAzureNerServiceEnabled()) {
NerEntitiesModel azureNerEntitiesModel = redactionStorageService.getAzureNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
azureNerEntitiesModel = filterNerEntitiesModelBySectionIds(sectionsToReanalyseIds, azureNerEntitiesModel);
NerEntities azureNerEntities = NerEntitiesAdapter.toNerEntities(azureNerEntitiesModel, document);
nerEntities.merge(azureNerEntities);
}
return nerEntities;
}

View File

@ -628,7 +628,9 @@ public class EntityCreationService {
.map(bBox -> Pair.of(bBox.getMaxY(), bBox.getMinY()))
.map(maxMinPair -> tableNode.streamRow(tableCell.getRow())
.filter(nextTableCell -> nextTableCell.getCol() > tableCell.getCol())
.map(nextTableCell -> RedactionSearchUtility.findTextRangesOfAllLinesWithCloseYCoordinates(maxMinPair.getLeft(), maxMinPair.getRight(), nextTableCell.getTextBlock()))
.map(nextTableCell -> RedactionSearchUtility.findTextRangesOfAllLinesWithCloseYCoordinates(maxMinPair.getLeft(),
maxMinPair.getRight(),
nextTableCell.getTextBlock()))
.map(b -> b.trim(tableNode.getTextBlock()))
.filter(boundary -> isValidEntityTextRange(tableNode.getTextBlock(), boundary))
.map(boundary -> byTextRange(boundary, type, entityType, tableNode))
@ -1223,6 +1225,49 @@ public class EntityCreationService {
}
/**
* Optionally creates a text entity based on a Named Entity Recognition (NER) entity
* if the confidence of the entity lies above the given minimal confidence.
*
* @param nerEntity The NER entity used for creating the text entity.
* @param minConfidence The minimal confidence required
* @param entityType The entity's classification.
* @param semanticNode The semantic node related to the NER entity.
* @return An {@link Optional} containing the new {@link TextEntity} based on the NER entity, or {@link Optional#empty()} if not created.
*/
public Optional<TextEntity> optionalByNerEntityWithConfidence(NerEntities.NerEntity nerEntity, Double minConfidence, EntityType entityType, SemanticNode semanticNode) {
if (nerEntity.confidence() != null && nerEntity.confidence() < minConfidence) {
return Optional.empty();
}
return byTextRangeWithEngine(nerEntity.textRange(), nerEntity.type(), entityType, semanticNode, Set.of(Engine.NER));
}
/**
* Optionally creates a text entity based on a Named Entity Recognition (NER) entity, with a specified type
* if the confidence of the entity lies above the given minimal confidence.
*
* @param nerEntity The NER entity used for creating the text entity.
* @param minConfidence The minimal confidence required
* @param type Type of the entity.
* @param entityType The entity's classification.
* @param semanticNode The semantic node related to the NER entity.
* @return An {@link Optional} containing the new {@link TextEntity} based on the NER entity, or {@link Optional#empty()} if not created.
*/
public Optional<TextEntity> optionalByNerEntityWithConfidence(NerEntities.NerEntity nerEntity,
Double minConfidence,
String type,
EntityType entityType,
SemanticNode semanticNode) {
if (nerEntity.confidence() != null && nerEntity.confidence() < minConfidence) {
return Optional.empty();
}
return byTextRangeWithEngine(nerEntity.textRange(), type, entityType, semanticNode, Set.of(Engine.NER));
}
/**
* Combines multiple NER entities into a single text entity.
*
@ -1241,6 +1286,137 @@ public class EntityCreationService {
}
/**
* Combines multiple NER entities into a single text entity based on the specified types and minimum parts to combine.
*
* @param nerEntities The collection of NER entities to combine.
* @param type The type for the combined entity.
* @param entityType The classification for the combined entity.
* @param semanticNode The semantic node related to these entities.
* @param essentialTypes A set of essential types that must be present in the combination.
* @param typesToCombine A set of types that should be considered for combination.
* @param minPartsToCombine The minimum number of parts that must be combined.
* @return A stream of combined {@link TextEntity} objects that match the specified criteria.
*/
public Stream<TextEntity> combineNerEntities(NerEntities nerEntities,
String type,
EntityType entityType,
SemanticNode semanticNode,
Set<String> essentialTypes,
Set<String> typesToCombine,
int minPartsToCombine) {
return NerEntitiesAdapter.combineNerEntitiesOfAllGivenTypes(nerEntities, essentialTypes, typesToCombine, minPartsToCombine)
.map(boundary -> byTextRangeWithEngine(boundary, type, entityType, semanticNode, Set.of(Engine.NER)))
.filter(Optional::isPresent)
.map(Optional::get);
}
/**
* Combines multiple NER entities into a single text entity based on the specified types, maximum distance between parts, and minimum parts to combine.
*
* @param nerEntities The collection of NER entities to combine.
* @param type The type for the combined entity.
* @param entityType The classification for the combined entity.
* @param semanticNode The semantic node related to these entities.
* @param essentialTypes A set of essential types that must be present in the combination.
* @param typesToCombine A set of types that should be considered for combination.
* @param maxDistanceBetweenParts The maximum distance allowed between parts to consider them for combination.
* @param minPartsToCombine The minimum number of parts that must be combined.
* @return A stream of combined {@link TextEntity} objects that match the specified criteria.
*/
public Stream<TextEntity> combineNerEntities(NerEntities nerEntities,
String type,
EntityType entityType,
SemanticNode semanticNode,
Set<String> essentialTypes,
Set<String> typesToCombine,
int maxDistanceBetweenParts,
int minPartsToCombine) {
return NerEntitiesAdapter.combineNerEntitiesOfAllGivenTypes(nerEntities, essentialTypes, typesToCombine, maxDistanceBetweenParts, minPartsToCombine)
.map(boundary -> byTextRangeWithEngine(boundary, type, entityType, semanticNode, Set.of(Engine.NER)))
.filter(Optional::isPresent)
.map(Optional::get);
}
/**
* Combines multiple NER entities into a single text entity based on the specified types, maximum distance between parts, minimum parts to combine, and minimum essential types combined.
*
* @param nerEntities The collection of NER entities to combine.
* @param type The type for the combined entity.
* @param entityType The classification for the combined entity.
* @param semanticNode The semantic node related to these entities.
* @param essentialTypes A set of essential types that must be present in the combination.
* @param typesToCombine A set of types that should be considered for combination.
* @param maxDistanceBetweenParts The maximum distance allowed between parts to consider them for combination.
* @param minPartsToCombine The minimum number of parts that must be combined.
* @param minEssentialTypesCombined The minimum number of essential types that must be combined.
* @return A stream of combined {@link TextEntity} objects that match the specified criteria.
*/
public Stream<TextEntity> combineNerEntities(NerEntities nerEntities,
String type,
EntityType entityType,
SemanticNode semanticNode,
Set<String> essentialTypes,
Set<String> typesToCombine,
int maxDistanceBetweenParts,
int minPartsToCombine,
int minEssentialTypesCombined) {
return NerEntitiesAdapter.combineNerEntitiesOfAllGivenTypes(nerEntities,
essentialTypes,
typesToCombine,
maxDistanceBetweenParts,
minPartsToCombine,
minEssentialTypesCombined)
.map(boundary -> byTextRangeWithEngine(boundary, type, entityType, semanticNode, Set.of(Engine.NER)))
.filter(Optional::isPresent)
.map(Optional::get);
}
/**
* Combines multiple NER entities into a single text entity based on the specified types, maximum distance between parts, minimum parts to combine, minimum essential types combined, and confidence level.
*
* @param nerEntities The collection of NER entities to combine.
* @param type The type for the combined entity.
* @param entityType The classification for the combined entity.
* @param semanticNode The semantic node related to these entities.
* @param essentialTypes A set of essential types that must be present in the combination.
* @param typesToCombine A set of types that should be considered for combination.
* @param maxDistanceBetweenParts The maximum distance allowed between parts to consider them for combination.
* @param minPartsToCombine The minimum number of parts that must be combined.
* @param minEssentialTypesCombined The minimum number of essential types that must be combined.
* @param confidence The confidence level required for combining entities.
* @return A stream of combined {@link TextEntity} objects that match the specified criteria and confidence level.
*/
public Stream<TextEntity> combineNerEntitiesWithConfidence(NerEntities nerEntities,
String type,
EntityType entityType,
SemanticNode semanticNode,
Set<String> essentialTypes,
Set<String> typesToCombine,
int maxDistanceBetweenParts,
int minPartsToCombine,
int minEssentialTypesCombined,
Double confidence) {
return NerEntitiesAdapter.combineNerEntitiesOfAllGivenTypesWithConfidence(nerEntities,
essentialTypes,
typesToCombine,
maxDistanceBetweenParts,
minPartsToCombine,
minEssentialTypesCombined,
confidence)
.map(boundary -> byTextRangeWithEngine(boundary, type, entityType, semanticNode, Set.of(Engine.NER)))
.filter(Optional::isPresent)
.map(Optional::get);
}
/**
* Validates if a given text range within a text block represents a valid entity.
*

View File

@ -19,6 +19,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import lombok.AccessLevel;
import lombok.NonNull;
import lombok.experimental.FieldDefaults;
import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;
@ -35,6 +36,7 @@ public class NerEntitiesAdapter {
static int MAX_DISTANCE_BETWEEN_PARTS = 20;
static int MIN_PARTS_TO_COMBINE = 3;
static boolean ALLOW_DUPLICATES;
static int MIN_ESSENTIAL_TYPES_COMBINED;
/**
@ -49,7 +51,8 @@ public class NerEntitiesAdapter {
return new NerEntities(addOffsetsAndFlatten(getStringStartOffsetsForMainSectionsHeadersFooters(document), nerEntitiesModel).map(nerEntityModel -> new NerEntities.NerEntity(
nerEntityModel.getValue(),
new TextRange(nerEntityModel.getStartOffset(), nerEntityModel.getEndOffset()),
nerEntityModel.getType()))
nerEntityModel.getType(),
nerEntityModel.getConfidence()))
.toList());
}
@ -60,12 +63,13 @@ public class NerEntitiesAdapter {
* The first part must be an EntityRecognitionEntity of an essential type.
* The resulting list must consist of at least minPartsToCombine parts.
*
* @param nerEntities already validated entities from the NER Service
* @param essentialTypes the combined entities must contain at least one of these types
* @param typesToCombine all types which should be used to combine, must contain all essentialTypes
* @param maxDistanceBetweenParts all parts used to combine should be at most this value apart in string offset coordinates
* @param minPartsToCombine minimum number of parts to combine
* @param allowDuplicates allow combining multiple parts of same type
* @param nerEntities already validated entities from the NER Service
* @param essentialTypes the combined entities must contain at least one of these types
* @param typesToCombine all types which should be used to combine, must contain all essentialTypes
* @param maxDistanceBetweenParts all parts used to combine should be at most this value apart in string offset coordinates
* @param minPartsToCombine minimum number of parts to combine
* @param minEssentialTypesCombined minimum number of essential types that must be part of a combination
* @param allowDuplicates allow combining multiple parts of same type
* @return A Stream of the combined boundaries
*/
public Stream<TextRange> combineNerEntities(NerEntities nerEntities,
@ -73,51 +77,107 @@ public class NerEntitiesAdapter {
Set<String> typesToCombine,
int maxDistanceBetweenParts,
int minPartsToCombine,
int minEssentialTypesCombined,
boolean allowDuplicates) {
List<NerEntities.NerEntity> sortedEntities = nerEntities.getNerEntityList()
.stream()
.filter(entity -> typesToCombine.contains(entity.type()))
.sorted(Comparator.comparingInt(entity -> entity.textRange().start()))
.toList();
return getTextRangeStream(nerEntities, essentialTypes, typesToCombine, maxDistanceBetweenParts, minPartsToCombine, minEssentialTypesCombined, allowDuplicates);
}
@NonNull
private static Stream<TextRange> getTextRangeStream(NerEntities nerEntities,
Set<String> essentialTypes,
Set<String> typesToCombine,
int maxDistanceBetweenParts,
int minPartsToCombine,
int minEssentialTypesCombined,
boolean allowDuplicates) {
List<NerEntities.NerEntity> sortedEntities = getSortedEntities(nerEntities, typesToCombine);
if (sortedEntities.isEmpty()) {
return Stream.empty();
}
List<List<NerEntities.NerEntity>> entityClusters = new LinkedList<>();
List<NerEntities.NerEntity> startEntitiesOfEssentialType = sortedEntities.stream()
.filter(e -> essentialTypes.contains(e.type()))
.toList();
for (NerEntities.NerEntity startEntity : startEntitiesOfEssentialType) {
List<NerEntities.NerEntity> currentCluster = new LinkedList<>();
entityClusters.add(currentCluster);
int lastEndOffset = startEntity.textRange().end();
for (NerEntities.NerEntity entity : sortedEntities) {
if (entity.textRange().start() < lastEndOffset) {
continue;
}
if (distanceIsLargerThanMaxDistance(lastEndOffset, entity, maxDistanceBetweenParts) || isDuplicate(currentCluster, entity, allowDuplicates)) {
currentCluster = new LinkedList<>();
entityClusters.add(currentCluster);
currentCluster.add(entity);
lastEndOffset = entity.textRange().end();
} else {
currentCluster.add(entity);
lastEndOffset = entity.textRange().end();
}
}
}
List<List<NerEntities.NerEntity>> entityClusters = getEntityClusters(essentialTypes,
maxDistanceBetweenParts,
minPartsToCombine,
minEssentialTypesCombined,
allowDuplicates,
sortedEntities);
return entityClusters.stream()
.filter(cluster -> cluster.size() >= minPartsToCombine)
.map(NerEntitiesAdapter::toContainingBoundary)
.distinct();
}
private static List<NerEntities.NerEntity> getSortedEntities(NerEntities nerEntities, Set<String> typesToCombine) {
return nerEntities.getNerEntityList()
.stream()
.filter(entity -> typesToCombine.contains(entity.type()))
.sorted(Comparator.comparingInt(entity -> entity.textRange().start()))
.toList();
}
private static List<List<NerEntities.NerEntity>> getEntityClusters(Set<String> essentialTypes,
int maxDistanceBetweenParts,
int minPartsToCombine,
int minEssentialTypesCombined,
boolean allowDuplicates,
List<NerEntities.NerEntity> sortedEntities) {
List<List<NerEntities.NerEntity>> entityClusters = new LinkedList<>();
List<NerEntities.NerEntity> currentCluster = new LinkedList<>();
int lastEndOffset = -1;
for (NerEntities.NerEntity entity : sortedEntities) {
// cluster can be added as it is either duplicated or distance is too large
if (!currentCluster.isEmpty() && (distanceIsLargerThanMaxDistance(lastEndOffset, entity, maxDistanceBetweenParts) || isDuplicate(currentCluster,
entity,
allowDuplicates))) {
entityClusters.add(new LinkedList<>(currentCluster));
currentCluster.clear();
}
currentCluster.add(entity);
lastEndOffset = entity.textRange().end();
}
// add the last cluster if not empty
if (!currentCluster.isEmpty()) {
entityClusters.add(currentCluster);
}
return filterClusters(entityClusters, essentialTypes, minPartsToCombine, minEssentialTypesCombined);
}
private static List<List<NerEntities.NerEntity>> filterClusters(List<List<NerEntities.NerEntity>> clusters,
Set<String> essentialTypes,
int minPartsToCombine,
int minEssentialTypesCombined) {
return clusters.stream()
.filter(cluster -> cluster.size() >= minPartsToCombine)
.filter(cluster -> countEssentialEntities(cluster, essentialTypes) >= minEssentialTypesCombined)
.distinct()
.toList();
}
private static long countEssentialEntities(List<NerEntities.NerEntity> cluster, Set<String> essentialTypes) {
return cluster.stream()
.filter(entity -> essentialTypes.contains(entity.type()))
.count();
}
/**
* Calls combine NerEntities with the following settings.
* <p>
@ -137,6 +197,76 @@ public class NerEntitiesAdapter {
CBI_ADDRESS_TYPES_TO_COMBINE,
MAX_DISTANCE_BETWEEN_PARTS,
MIN_PARTS_TO_COMBINE,
MIN_ESSENTIAL_TYPES_COMBINED,
ALLOW_DUPLICATES);
}
public Stream<TextRange> combineNerEntitiesOfAllGivenTypes(NerEntities entityRecognitionEntities,
Set<String> essentialTypes,
Set<String> typesToCombine,
int minPartsToCombine) {
return combineNerEntities(entityRecognitionEntities,
essentialTypes,
typesToCombine,
MAX_DISTANCE_BETWEEN_PARTS,
minPartsToCombine,
MIN_ESSENTIAL_TYPES_COMBINED,
ALLOW_DUPLICATES);
}
public Stream<TextRange> combineNerEntitiesOfAllGivenTypes(NerEntities entityRecognitionEntities,
Set<String> essentialTypes,
Set<String> typesToCombine,
int maxDistanceBetweenParts,
int minPartsToCombine) {
return combineNerEntities(entityRecognitionEntities,
essentialTypes,
typesToCombine,
maxDistanceBetweenParts,
minPartsToCombine,
MIN_ESSENTIAL_TYPES_COMBINED,
ALLOW_DUPLICATES);
}
public Stream<TextRange> combineNerEntitiesOfAllGivenTypes(NerEntities entityRecognitionEntities,
Set<String> essentialTypes,
Set<String> typesToCombine,
int maxDistanceBetweenParts,
int minPartsToCombine,
int minEssentialTypesCombined) {
return combineNerEntities(entityRecognitionEntities,
essentialTypes,
typesToCombine,
maxDistanceBetweenParts,
minPartsToCombine,
minEssentialTypesCombined,
ALLOW_DUPLICATES);
}
public Stream<TextRange> combineNerEntitiesOfAllGivenTypesWithConfidence(NerEntities entityRecognitionEntities,
Set<String> essentialTypes,
Set<String> typesToCombine,
int maxDistanceBetweenParts,
int minPartsToCombine,
int minEssentialTypesCombined,
Double confidence) {
return combineNerEntities(new NerEntities(entityRecognitionEntities.getNerEntityList()
.stream()
.filter(nerEntity -> nerEntity.confidence() == null || nerEntity.confidence() >= confidence)
.toList()),
essentialTypes,
typesToCombine,
maxDistanceBetweenParts,
minPartsToCombine,
minEssentialTypesCombined,
ALLOW_DUPLICATES);
}

View File

@ -153,14 +153,13 @@ public class RedactionStorageService {
}
@Timed("redactmanager_getImportedLegalBases")
public ImportedLegalBases getImportedLegalBases(String dossierId, String fileId) {
try {
return storageService.readJSONObject(TenantContext.getTenantId(),
StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMPORTED_LEGAL_BASES),
ImportedLegalBases.class);
StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMPORTED_LEGAL_BASES),
ImportedLegalBases.class);
} catch (StorageObjectDoesNotExist e) {
log.debug("Imported legal bases not available.");
return new ImportedLegalBases();
@ -295,6 +294,17 @@ public class RedactionStorageService {
}
@Timed("redactmanager_getAzureNerEntities")
public NerEntitiesModel getAzureNerEntities(String dossierId, String fileId) {
try {
return storageService.readJSONObject(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.AZURE_NER_ENTITIES), NerEntitiesModel.class);
} catch (StorageObjectDoesNotExist e) {
throw new NotFoundException("NER Entities are not available.");
}
}
public ComponentLog getComponentLog(String dossierId, String fileId) {
try {

View File

@ -6,9 +6,11 @@ import static org.wildfly.common.Assert.assertTrue;
import java.awt.Color;
import java.awt.geom.Rectangle2D;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@ -66,15 +68,38 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
String filePath = "files/new/crafted document.pdf";
String nerEntitiesFilePath = "ner_entities/crafted document.NER_ENTITIES.json";
Document document = buildGraphNoImages(filePath);
List<NerEntities.NerEntity> entityRecognitionEntities = validateAndCombine(parseNerEntities(nerEntitiesFilePath), document);
NerEntities nerEntities = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document);
getNerEntitiesForFile(new ClassPathResource(filePath), document, nerEntities);
}
@Test
@SneakyThrows
public void testGetAzureNerEntities() {
String filePath = "files/new/intertek.ORIGIN.pdf";
String nerEntitiesFilePath = "ner_entities/intertek.NER_ENTITIES.json";
String azureNerEntitiesFilePath = "ner_entities/intertek.AZURE_NER_ENTITIES.json";
Document document = buildGraphNoImages(filePath);
NerEntities azureNerEntities = NerEntitiesAdapter.toNerEntities(parseNerEntities(azureNerEntitiesFilePath), document);
NerEntities nerEntities = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document);
nerEntities.merge(azureNerEntities);
getNerEntitiesForFile(new ClassPathResource(filePath), document, nerEntities);
}
private void getNerEntitiesForFile(ClassPathResource resource, Document document, NerEntities nerEntities) throws IOException {
List<NerEntities.NerEntity> entityRecognitionEntities = validateAndCombine(nerEntities, document);
assertFalse(entityRecognitionEntities.isEmpty());
assertTrue(entityRecognitionEntities.stream()
.allMatch(entity -> entity.textRange().start() < entity.textRange().end()));
ClassPathResource resource = new ClassPathResource(filePath);
try (PDDocument pdDocument = Loader.loadPDF(resource.getFile())) {
Stream<NerEntities.NerEntity> unchangedAddressParts = NerEntitiesAdapter.toNerEntities(parseNerEntities(nerEntitiesFilePath), document).getNerEntityList()
Stream<NerEntities.NerEntity> unchangedAddressParts = nerEntities.getNerEntityList()
.stream()
.filter(e -> !e.type().equals("CBI_author"));
List<TextEntity> redactionEntities = Stream.concat(entityRecognitionEntities.stream(), unchangedAddressParts)
@ -96,7 +121,6 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
File outputFile = new File("/tmp/nerEntities.pdf");
pdDocument.save(outputFile);
}
}
@ -150,16 +174,37 @@ class NerEntitiesAdapterTest extends BuildDocumentIntegrationTest {
}
private List<NerEntities.NerEntity> validateAndCombine(NerEntitiesModel nerEntitiesModel, Document document) {
NerEntities nerEntities = NerEntitiesAdapter.toNerEntities(nerEntitiesModel, document);
private List<NerEntities.NerEntity> validateAndCombine(NerEntities nerEntities, Document document) {
List<NerEntities.NerEntity> cbiAuthors = nerEntities.streamEntitiesOfType("CBI_author")
.toList();
Stream<NerEntities.NerEntity> cbiAddress = NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
.map(boundary -> new NerEntities.NerEntity(document.getTextBlock().subSequence(boundary).toString(), boundary, "CBI_address"));
Stream<NerEntities.NerEntity> azureCbiAddress = NerEntitiesAdapter.combineNerEntitiesOfAllGivenTypesWithConfidence(nerEntities,
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"CITY"),
Set.of("Organization",
"Location",
"Address",
"Quantity",
"ORG",
"STREET",
"POSTAL",
"COUNTRY",
"CARDINAL",
"CITY",
"STATE"),
50,
3,
2,
0.7)
.map(boundary -> new NerEntities.NerEntity(document.getTextBlock().subSequence(boundary).toString(), boundary, "CBI_address"));
return Stream.concat(cbiAuthors.stream(), cbiAddress)
return Stream.concat(cbiAuthors.stream(), Stream.concat(cbiAddress, azureCbiAddress))
.toList();
}

View File

@ -1084,6 +1084,84 @@ rule "AI.1.0: Combine and add NER Entities as CBI_address"
end
// Rule unit: AI.4
rule "AI.4.0: Add all NER Entities of type Person"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Person"))
then
nerEntities.streamEntitiesOfType("Person")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.4.0", "")));
end
// Rule unit: AI.5
rule "AI.5.0: Combine and add NER Entities as CBI_address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
then
entityCreationService
.combineNerEntitiesWithConfidence(
nerEntities,
"CBI_address",
EntityType.RECOMMENDATION,
document,
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"CITY"),
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"POSTAL",
"COUNTRY",
"CARDINAL",
"CITY",
"STATE"),
50,
3,
2,
0.7)
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
end
// Rule unit: AI.6
rule "AI.6.0: Add all NER Entities of type Location"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location"))
then
nerEntities.streamEntitiesOfType("Location")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.6.0", "")));
end
// Rule unit: AI.7
rule "AI.7.0: Add all NER Entities of type Address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Address"))
then
nerEntities.streamEntitiesOfType("Address")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.7.0", "")));
end
//------------------------------------ Manual changes rules ------------------------------------
// Rule unit: MAN.0

View File

@ -1738,6 +1738,84 @@ rule "AI.3.0: Recommend authors from AI as PII"
end
// Rule unit: AI.4
rule "AI.4.0: Add all NER Entities of type Person"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Person"))
then
nerEntities.streamEntitiesOfType("Person")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.4.0", "")));
end
// Rule unit: AI.5
rule "AI.5.0: Combine and add NER Entities as CBI_address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
then
entityCreationService
.combineNerEntitiesWithConfidence(
nerEntities,
"CBI_address",
EntityType.RECOMMENDATION,
document,
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"CITY"),
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"POSTAL",
"COUNTRY",
"CARDINAL",
"CITY",
"STATE"),
50,
3,
2,
0.7)
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
end
// Rule unit: AI.6
rule "AI.6.0: Add all NER Entities of type Location"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location"))
then
nerEntities.streamEntitiesOfType("Location")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.6.0", "")));
end
// Rule unit: AI.7
rule "AI.7.0: Add all NER Entities of type Address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Address"))
then
nerEntities.streamEntitiesOfType("Address")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.7.0", "")));
end
//------------------------------------ Manual changes rules ------------------------------------
// Rule unit: MAN.0

View File

@ -1149,6 +1149,86 @@ rule "DOC.35.0: Doses (mg/kg bodyweight)"
end
//------------------------------------ AI rules ------------------------------------
// Rule unit: AI.4
rule "AI.4.0: Add all NER Entities of type Person"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Person"))
then
nerEntities.streamEntitiesOfType("Person")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.4.0", "")));
end
// Rule unit: AI.5
rule "AI.5.0: Combine and add NER Entities as CBI_address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
then
entityCreationService
.combineNerEntitiesWithConfidence(
nerEntities,
"CBI_address",
EntityType.RECOMMENDATION,
document,
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"CITY"),
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"POSTAL",
"COUNTRY",
"CARDINAL",
"CITY",
"STATE"),
50,
3,
2,
0.7)
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
end
// Rule unit: AI.6
rule "AI.6.0: Add all NER Entities of type Location"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location"))
then
nerEntities.streamEntitiesOfType("Location")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.6.0", "")));
end
// Rule unit: AI.7
rule "AI.7.0: Add all NER Entities of type Address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Address"))
then
nerEntities.streamEntitiesOfType("Address")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.7.0", "")));
end
//------------------------------------ Manual changes rules ------------------------------------
// Rule unit: MAN.0

View File

@ -643,6 +643,84 @@ rule "AI.1.0: Combine and add NER Entities as CBI_address"
end
// Rule unit: AI.4
rule "AI.4.0: Add all NER Entities of type Person"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Person"))
then
nerEntities.streamEntitiesOfType("Person")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.4.0", "")));
end
// Rule unit: AI.5
rule "AI.5.0: Combine and add NER Entities as CBI_address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
then
entityCreationService
.combineNerEntitiesWithConfidence(
nerEntities,
"CBI_address",
EntityType.RECOMMENDATION,
document,
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"CITY"),
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"POSTAL",
"COUNTRY",
"CARDINAL",
"CITY",
"STATE"),
50,
3,
2,
0.7)
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
end
// Rule unit: AI.6
rule "AI.6.0: Add all NER Entities of type Location"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location"))
then
nerEntities.streamEntitiesOfType("Location")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.6.0", "")));
end
// Rule unit: AI.7
rule "AI.7.0: Add all NER Entities of type Address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Address"))
then
nerEntities.streamEntitiesOfType("Address")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.7.0", "")));
end
//------------------------------------ Manual changes rules ------------------------------------
// Rule unit: MAN.0

View File

@ -69,6 +69,86 @@ query "getFileAttributes"
$fileAttribute: FileAttribute()
end
//------------------------------------ AI rules ------------------------------------
// Rule unit: AI.4
rule "AI.4.0: Add all NER Entities of type Person"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Person"))
then
nerEntities.streamEntitiesOfType("Person")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.4.0", "")));
end
// Rule unit: AI.5
rule "AI.5.0: Combine and add NER Entities as CBI_address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
then
entityCreationService
.combineNerEntitiesWithConfidence(
nerEntities,
"CBI_address",
EntityType.RECOMMENDATION,
document,
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"CITY"),
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"POSTAL",
"COUNTRY",
"CARDINAL",
"CITY",
"STATE"),
50,
3,
2,
0.7)
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
end
// Rule unit: AI.6
rule "AI.6.0: Add all NER Entities of type Location"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location"))
then
nerEntities.streamEntitiesOfType("Location")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.6.0", "")));
end
// Rule unit: AI.7
rule "AI.7.0: Add all NER Entities of type Address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Address"))
then
nerEntities.streamEntitiesOfType("Address")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.7.0", "")));
end
//------------------------------------ Manual changes rules ------------------------------------
// Rule unit: MAN.0

View File

@ -1173,6 +1173,85 @@ rule "AI.1.0: Combine and add NER Entities as CBI_address"
end
// Rule unit: AI.4
rule "AI.4.0: Add all NER Entities of type Person"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Person"))
then
nerEntities.streamEntitiesOfType("Person")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.4.0", "")));
end
// Rule unit: AI.5
rule "AI.5.0: Combine and add NER Entities as CBI_address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
then
entityCreationService
.combineNerEntitiesWithConfidence(
nerEntities,
"CBI_address",
EntityType.RECOMMENDATION,
document,
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"CITY"),
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"POSTAL",
"COUNTRY",
"CARDINAL",
"CITY",
"STATE"),
50,
3,
2,
0.7)
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
end
// Rule unit: AI.6
rule "AI.6.0: Add all NER Entities of type Location"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location"))
then
nerEntities.streamEntitiesOfType("Location")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.6.0", "")));
end
// Rule unit: AI.7
rule "AI.7.0: Add all NER Entities of type Address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Address"))
then
nerEntities.streamEntitiesOfType("Address")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.7.0", "")));
end
//------------------------------------ Manual changes rules ------------------------------------
// Rule unit: MAN.0

View File

@ -108,6 +108,84 @@ rule "AI.0.0: Add all NER Entities of type CBI_author"
end
// Rule unit: AI.4
rule "AI.4.0: Add all NER Entities of type Person"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Person"))
then
nerEntities.streamEntitiesOfType("Person")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.4.0", "")));
end
// Rule unit: AI.5
rule "AI.5.0: Combine and add NER Entities as CBI_address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
then
entityCreationService
.combineNerEntitiesWithConfidence(
nerEntities,
"CBI_address",
EntityType.RECOMMENDATION,
document,
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"CITY"),
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"POSTAL",
"COUNTRY",
"CARDINAL",
"CITY",
"STATE"),
50,
3,
2,
0.7)
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
end
// Rule unit: AI.6
rule "AI.6.0: Add all NER Entities of type Location"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location"))
then
nerEntities.streamEntitiesOfType("Location")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.6.0", "")));
end
// Rule unit: AI.7
rule "AI.7.0: Add all NER Entities of type Address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Address"))
then
nerEntities.streamEntitiesOfType("Address")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.7.0", "")));
end
//------------------------------------ Manual changes rules ------------------------------------
// Rule unit: MAN.0

View File

@ -219,6 +219,86 @@ rule "TAB.7.0: Indicator (Species)"
end
//------------------------------------ AI rules ------------------------------------
// Rule unit: AI.4
rule "AI.4.0: Add all NER Entities of type Person"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Person"))
then
nerEntities.streamEntitiesOfType("Person")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.4.0", "")));
end
// Rule unit: AI.5
rule "AI.5.0: Combine and add NER Entities as CBI_address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
then
entityCreationService
.combineNerEntitiesWithConfidence(
nerEntities,
"CBI_address",
EntityType.RECOMMENDATION,
document,
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"CITY"),
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"POSTAL",
"COUNTRY",
"CARDINAL",
"CITY",
"STATE"),
50,
3,
2,
0.7)
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
end
// Rule unit: AI.6
rule "AI.6.0: Add all NER Entities of type Location"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location"))
then
nerEntities.streamEntitiesOfType("Location")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.6.0", "")));
end
// Rule unit: AI.7
rule "AI.7.0: Add all NER Entities of type Address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Address"))
then
nerEntities.streamEntitiesOfType("Address")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.7.0", "")));
end
//------------------------------------ Manual changes rules ------------------------------------
// Rule unit: MAN.0

View File

@ -119,6 +119,86 @@ rule "TAB.6.0: Targeted cell extraction (Experimental Stop date)"
end
//------------------------------------ AI rules ------------------------------------
// Rule unit: AI.4
rule "AI.4.0: Add all NER Entities of type Person"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Person"))
then
nerEntities.streamEntitiesOfType("Person")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.4.0", "")));
end
// Rule unit: AI.5
rule "AI.5.0: Combine and add NER Entities as CBI_address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
then
entityCreationService
.combineNerEntitiesWithConfidence(
nerEntities,
"CBI_address",
EntityType.RECOMMENDATION,
document,
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"CITY"),
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"POSTAL",
"COUNTRY",
"CARDINAL",
"CITY",
"STATE"),
50,
3,
2,
0.7)
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
end
// Rule unit: AI.6
rule "AI.6.0: Add all NER Entities of type Location"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location"))
then
nerEntities.streamEntitiesOfType("Location")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.6.0", "")));
end
// Rule unit: AI.7
rule "AI.7.0: Add all NER Entities of type Address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Address"))
then
nerEntities.streamEntitiesOfType("Address")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.7.0", "")));
end
//------------------------------------ Manual changes rules ------------------------------------
// Rule unit: MAN.0

View File

@ -0,0 +1 @@
{"dossierId": "2e41b84e-30ed-4098-b722-ed309a8a5bfb", "fileId": "caf8ba677d05df7a6625449e43c20baf", "targetFileExtension": "SIMPLIFIED_TEXT.json.gz", "responseFileExtension": "NER_ENTITIES.json.gz", "X-TENANT-ID": "redaction", "data": {"2.1.1": [{"value": "Corporation Buckingham Palace Buckingham Palace Road", "startOffset": 73, "endOffset": 125, "type": "DEPARTMENT"}, {"value": "Head of Alliance Management", "startOffset": 194, "endOffset": 221, "type": "JOB_TITEL"}, {"value": "john.smith@smithcorp.com", "startOffset": 246, "endOffset": 270, "type": "MAIL"}], "2.1.2": [{"value": "Corporation Buckingham Palace Buckingham Palace Road", "startOffset": 79, "endOffset": 131, "type": "DEPARTMENT"}, {"value": "Head of Manufacture Tel", "startOffset": 199, "endOffset": 222, "type": "JOB_TITEL"}, {"value": "mimi.lang@smithcorp.com", "startOffset": 243, "endOffset": 266, "type": "MAIL"}], "2.1.3": [{"value": "+44 (0)1252 392460 Email:", "startOffset": 139, "endOffset": 164, "type": "PHONE"}, {"value": "United Kingdom", "startOffset": 338, "endOffset": 352, "type": "COUNTRY"}], "2.1.6.1": [{"value": "EU", "startOffset": 90, "endOffset": 92, "type": "ORG"}, {"value": "EU", "startOffset": 263, "endOffset": 265, "type": "ORG"}, {"value": "European Food Safety Authority", "startOffset": 1280, "endOffset": 1310, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 1373, "endOffset": 1387, "type": "ORG"}], "2.1.6.4": [{"value": "Gidley", "startOffset": 66, "endOffset": 72, "type": "CBI_author"}, {"value": "Dentini", "startOffset": 149, "endOffset": 156, "type": "CBI_author"}, {"value": "Lang", "startOffset": 87, "endOffset": 91, "type": "CBI_author"}, {"value": "Kajiwara", "startOffset": 96, "endOffset": 104, "type": "CBI_author"}, {"value": "Kato", "startOffset": 112, "endOffset": 116, "type": "CBI_author"}, {"value": "Lang", "startOffset": 184, "endOffset": 188, "type": "CBI_author"}], "2.1.9": [{"value": "EFSA", "startOffset": 4101, "endOffset": 4105, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 4161, "endOffset": 4175, "type": "ORG"}, {"value": "Ames", "startOffset": 2392, "endOffset": 2396, "type": "NO_AUTHOR"}], "2.1.10.2": [{"value": "EFSA", "startOffset": 942, "endOffset": 946, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 1002, "endOffset": 1016, "type": "ORG"}, {"value": "EFSA", "startOffset": 1101, "endOffset": 1105, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 1161, "endOffset": 1175, "type": "ORG"}, {"value": "EFSA", "startOffset": 1788, "endOffset": 1792, "type": "ORG"}], "2.1.10.3": [{"value": "EFSA", "startOffset": 129, "endOffset": 133, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 189, "endOffset": 203, "type": "ORG"}], "2.1.10.4": [{"value": "EFSA", "startOffset": 129, "endOffset": 133, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 189, "endOffset": 203, "type": "ORG"}, {"value": "EFSA", "startOffset": 2618, "endOffset": 2622, "type": "ORG"}, {"value": "European Food Safety Authority", "startOffset": 2625, "endOffset": 2655, "type": "ORG"}, {"value": "European Food Safety Authority", "startOffset": 3893, "endOffset": 3923, "type": "ORG"}], "2.1.10.5": [{"value": "EFSA", "startOffset": 129, "endOffset": 133, "type": "ORG"}, {"value": "EFSA ANS Panel", "startOffset": 189, "endOffset": 203, "type": "ORG"}], "17": [{"value": "2", "startOffset": 18, "endOffset": 19, "type": "CARDINAL"}]}}

View File

@ -1734,6 +1734,88 @@ rule "AI.3.0: Recommend authors from AI as PII"
.forEach(nerEntity -> entityCreationService.optionalByNerEntity(nerEntity, "PII", EntityType.RECOMMENDATION, document));
end
// Rule unit: AI.4
rule "AI.4.0: Add all NER Entities of type Person"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Person"))
then
nerEntities.streamEntitiesOfType("Person")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_author", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.4.0", "")));
end
// Rule unit: AI.5
rule "AI.5.0: Combine and add NER Entities as CBI_address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location") || hasEntitiesOfType("Address")|| hasEntitiesOfType("Organization"))
then
entityCreationService
.combineNerEntitiesWithConfidence(
nerEntities,
"CBI_address",
EntityType.RECOMMENDATION,
document,
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"CITY"),
Set.of("Organization",
"Location",
"Address",
"ORG",
"STREET",
"POSTAL",
"COUNTRY",
"CARDINAL",
"CITY",
"STATE"),
50,
3,
2,
0.7)
.forEach(nerEntity -> nerEntity.skip("AI.5.0", ""));
end
// Rule unit: AI.6
rule "AI.6.0: Add all NER Entities of type Location"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Location"))
then
nerEntities.streamEntitiesOfType("Location")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.6.0", "")));
end
// Rule unit: AI.7
rule "AI.7.0: Add all NER Entities of type Address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("Address"))
then
nerEntities.streamEntitiesOfType("Address")
.filter(entity -> entity.value().length() > 3)
.filter(entity -> entity.value().length() < 100)
.forEach(nerEntity -> entityCreationService.optionalByNerEntityWithConfidence(nerEntity, 0.7, "CBI_address", EntityType.RECOMMENDATION, document)
.ifPresent(e -> e.skip("AI.7.0", "")));
end
//------------------------------------ Manual redaction rules ------------------------------------
// Rule unit: MAN.0