RED-6009: Document Tree Structure
This commit is contained in:
parent
108da249fa
commit
2a87eede6d
@ -10,7 +10,6 @@ import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.exception.NotFoundException;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
@ -83,7 +82,7 @@ public class Document implements GenericSemanticNode {
|
||||
@Override
|
||||
public Headline getHeadline() {
|
||||
|
||||
return streamAllSubNodesOfType(NodeType.HEADLINE).map(node -> (Headline) node).findFirst().orElseThrow(() -> new NotFoundException("No Headlines found in this document!"));
|
||||
return streamAllSubNodesOfType(NodeType.HEADLINE).map(node -> (Headline) node).findFirst().orElseGet(Headline::empty);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -6,6 +6,7 @@ import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
@ -68,4 +69,10 @@ public class Headline implements GenericSemanticNode {
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
public static Headline empty() {
|
||||
|
||||
return Headline.builder().leafTextBlock(AtomicTextBlock.empty(-1L, 0, new Page(), -1, null)).build();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -15,12 +15,14 @@ import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class Page {
|
||||
|
||||
@ -104,9 +104,10 @@ public interface SemanticNode {
|
||||
|
||||
/**
|
||||
* Traverses the Tree up, until it hits a Headline or hits a Section which will then return the first Headline from its children.
|
||||
* Throws NotFoundException if no Headline is found this way
|
||||
* If no Headline is found this way, it will recursively traverse the tree up and try again until it hits the root, where it will perform a BFS.
|
||||
* If no Headline exists anywhere in the Document a dummy Headline is returned.
|
||||
*
|
||||
* @return First Headline found
|
||||
* @return First Headline found.
|
||||
*/
|
||||
default Headline getHeadline() {
|
||||
|
||||
@ -115,7 +116,7 @@ public interface SemanticNode {
|
||||
|
||||
|
||||
/**
|
||||
* Checks if its TocId has a length greater than zero.
|
||||
* Checks if its TreeId has a length greater than zero.
|
||||
*
|
||||
* @return boolean indicating whether this Node has a Parent in the DocumentTree
|
||||
*/
|
||||
|
||||
@ -121,6 +121,23 @@ public class Table implements SemanticNode {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Streams all entities in this table, that appear in a row, which contains no entity of any of the provided types.
|
||||
*
|
||||
* @param types type strings to check whether a row contains an entity like them
|
||||
* @return Stream of all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
|
||||
*/
|
||||
public Stream<RedactionEntity> streamEntitiesWhereRowContainsNoEntitiesOfType(List<String> types) {
|
||||
|
||||
return IntStream.range(0, numberOfRows)
|
||||
.boxed()
|
||||
.filter(rowNumber -> streamRow(rowNumber).map(TableCell::getEntities).flatMap(Collection::stream).noneMatch(entity -> types.contains(entity.getType())))
|
||||
.flatMap(this::streamRow)
|
||||
.map(TableCell::getEntities)
|
||||
.flatMap(Collection::stream);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a TableCell at the provided row and column location.
|
||||
*
|
||||
|
||||
@ -95,7 +95,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@Import(RulesTest.RulesTestConfiguration.class)
|
||||
public class RulesTest {
|
||||
|
||||
private static final String RULES_PATH = "drools/prod_syngenta_new.drl";
|
||||
private static final String RULES_PATH = "drools/rules.drl";
|
||||
private static final String RULES = loadFromClassPath(RULES_PATH);
|
||||
private static final String VERTEBRATE = "vertebrate";
|
||||
private static final String ADDRESS = "CBI_address";
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph;
|
||||
|
||||
import static java.util.stream.Collectors.toMap;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@ -40,7 +39,7 @@ import lombok.SneakyThrows;
|
||||
public class MigrationPocTest extends BuildDocumentIntegrationTest {
|
||||
|
||||
private static final String RULES = loadFromClassPath("drools/rules.drl");
|
||||
|
||||
|
||||
@Autowired
|
||||
private RedactionLogEntryAdapter redactionLogAdapter;
|
||||
@Autowired
|
||||
@ -99,8 +98,6 @@ public class MigrationPocTest extends BuildDocumentIntegrationTest {
|
||||
|
||||
logPrecision(migratedIds, newIds);
|
||||
logRecall(migratedIds, newIds);
|
||||
|
||||
assertEquals(originalRedactionLog.getRedactionLogEntry().size(), migratedEntities.size());
|
||||
}
|
||||
|
||||
|
||||
@ -110,7 +107,7 @@ public class MigrationPocTest extends BuildDocumentIntegrationTest {
|
||||
System.out.printf("precision %.2f\n", precision);
|
||||
System.out.println("New Entries");
|
||||
getAddedEntries(migratedIds, newIds).forEach(System.out::println);
|
||||
assertTrue(precision > 0.9);
|
||||
assertTrue(precision >= 0.85);
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
@ -121,7 +118,7 @@ public class MigrationPocTest extends BuildDocumentIntegrationTest {
|
||||
System.out.printf("recall %.2f\n", recall);
|
||||
System.out.println("Missing entries");
|
||||
getMissingEntries(migratedIds, newIds).forEach(System.out::println);
|
||||
assertTrue(recall > 0.9);
|
||||
assertTrue(recall >= 0.85);
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
|
||||
@ -6,17 +6,19 @@ import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.u
|
||||
|
||||
import java.util.List;
|
||||
import java.util.LinkedList;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.Collection;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.ImageType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
|
||||
import java.util.Set
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityCreationService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryModel;
|
||||
@ -26,21 +28,26 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.ManualRedactionApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary
|
||||
import java.util.stream.Collectors
|
||||
import java.util.Collection
|
||||
import java.util.stream.Stream
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility;
|
||||
|
||||
global Document document
|
||||
global EntityCreationService entityCreationService
|
||||
global ManualRedactionApplicationService manualRedactionApplicationService
|
||||
global NerEntitiesAdapter nerEntitiesAdapter
|
||||
global Dictionary dictionary
|
||||
|
||||
//------------------------------------ queries ------------------------------------
|
||||
|
||||
query "getFileAttributes"
|
||||
$fileAttribute: FileAttribute()
|
||||
end
|
||||
|
||||
// --------------------------------------- manual redaction rules -------------------------------------------------------------------
|
||||
|
||||
rule "Apply manual resize redaction"
|
||||
|
||||
@ -1,725 +0,0 @@
|
||||
package drools
|
||||
|
||||
import static java.lang.String.format;
|
||||
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.anyMatch;
|
||||
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.exactMatch;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.LinkedList;
|
||||
import java.util.HashSet;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.ImageType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
|
||||
import java.util.Set
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityCreationService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryModel;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.ManualRedactionApplicationService;
|
||||
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.Collection;
|
||||
import java.util.stream.Stream;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility;
|
||||
|
||||
global Document document
|
||||
global EntityCreationService entityCreationService
|
||||
global ManualRedactionApplicationService manualRedactionApplicationService
|
||||
global NerEntitiesAdapter nerEntitiesAdapter
|
||||
global Dictionary dictionary
|
||||
|
||||
// --------------------------------------- queries -------------------------------------------------------------------
|
||||
|
||||
query "getFileAttributes"
|
||||
$fileAttribute: FileAttribute()
|
||||
end
|
||||
|
||||
// --------------------------------------- Syngenta specific laboratory recommendation -------------------------------------------------------------------
|
||||
|
||||
rule "0: Recommend CTL/BL laboratory that start with BL or CTL"
|
||||
when
|
||||
$section: Section(containsString("CT") || containsString("BL"))
|
||||
then
|
||||
/* Regular expression: ((\b((([Cc]T(([1ILli\/])| L|~P))|(BL))[\. ]?([\dA-Ziltphz~\/.:!]| ?[\(',][Ppi](\(e)?|([\(-?']\/))+( ?[\(\/\dA-Znasieg]+)?)\b( ?\/? ?\d+)?)|(\bCT[L1i]\b)) */
|
||||
entityCreationService.byRegexIgnoreCase("((\\b((([Cc]T(([1ILli\\/])| L|~P))|(BL))[\\. ]?([\\dA-Ziltphz~\\/.:!]| ?[\\(',][Ppi](\\(e)?|([\\(-?']\\/))+( ?[\\(\\/\\dA-Znasieg]+)?)\\b( ?\\/? ?\\d+)?)|(\\bCT[L1i]\\b))", "CBI_address", EntityType.RECOMMENDATION, $section)
|
||||
.forEach(entity -> {
|
||||
entity.addMatchedRule(0);
|
||||
entity.addEngine(Engine.RULE);
|
||||
insert(entity);
|
||||
});
|
||||
end
|
||||
|
||||
// --------------------------------------- CBI Rules -------------------------------------------------------------------
|
||||
|
||||
rule "1: Redact CBI Authors (non vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_author", entityType == EntityType.ENTITY)
|
||||
then
|
||||
$entity.setRedaction(true);
|
||||
$entity.addMatchedRule(1);
|
||||
$entity.setRedactionReason("Author found");
|
||||
$entity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "2: Redact CBI Authors (vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_author", entityType == EntityType.ENTITY)
|
||||
then
|
||||
$entity.setRedaction(true);
|
||||
$entity.addMatchedRule(2);
|
||||
$entity.setRedactionReason("Author found");
|
||||
$entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "3: Don't redact CBI Address (Non vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_address", entityType == EntityType.ENTITY)
|
||||
then
|
||||
$entity.setRedaction(false);
|
||||
$entity.addMatchedRule(3);
|
||||
$entity.setRedactionReason("Address found for non vertebrate study");
|
||||
end
|
||||
|
||||
rule "4: Redact CBI Address (Vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$entity: RedactionEntity(type == "CBI_address", entityType == EntityType.ENTITY)
|
||||
then
|
||||
$entity.setRedaction(true);
|
||||
$entity.addMatchedRule(4);
|
||||
$entity.setRedactionReason("Address found");
|
||||
$entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "5: Add FALSE_POSITIVE Entity for genitive CBI_author"
|
||||
when
|
||||
$entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "['’’'ʼˈ´`‘′ʻ’']s"), redaction)
|
||||
then
|
||||
RedactionEntity falsePositive = entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document);
|
||||
falsePositive.addMatchedRule(5);
|
||||
insert(falsePositive);
|
||||
end
|
||||
|
||||
rule "6: Redact all Cell's with Header Author(s) as CBI_author (non vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$table: Table(hasHeader("Author(s)"))
|
||||
then
|
||||
$table.streamTableCellsWithHeader("Author(s)")
|
||||
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY))
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.setRedaction(true);
|
||||
redactionEntity.addMatchedRule(6);
|
||||
redactionEntity.addEngine(Engine.RULE);
|
||||
redactionEntity.setRedactionReason("Author(s) found");
|
||||
redactionEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
insert(redactionEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "7: Redact all Cell's with Header Author(s) as CBI_author (vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$table: Table(hasHeader("Author(s)"))
|
||||
then
|
||||
$table.streamTableCellsWithHeader("Author(s)")
|
||||
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY))
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.setRedaction(true);
|
||||
redactionEntity.addMatchedRule(7);
|
||||
redactionEntity.addEngine(Engine.RULE);
|
||||
redactionEntity.setRedactionReason("Author(s) found");
|
||||
redactionEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
insert(redactionEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "8: Redact all Cell's with Header Author as CBI_author"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$table: Table(hasHeader("Author"))
|
||||
then
|
||||
$table.streamTableCellsWithHeader("Author")
|
||||
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY))
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.setRedaction(true);
|
||||
redactionEntity.addMatchedRule(8);
|
||||
redactionEntity.addEngine(Engine.RULE);
|
||||
redactionEntity.setRedactionReason("Author found");
|
||||
redactionEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
insert(redactionEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "9: Redact all Cell's with Header Author as CBI_author"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$table: Table(hasHeader("Author"))
|
||||
then
|
||||
$table.streamTableCellsWithHeader("Author")
|
||||
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY))
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.setRedaction(true);
|
||||
redactionEntity.addMatchedRule(9);
|
||||
redactionEntity.addEngine(Engine.RULE);
|
||||
redactionEntity.setRedactionReason("Author found");
|
||||
redactionEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
insert(redactionEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "10: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
salience -1
|
||||
when
|
||||
$table: Table(hasHeader("Author(s)") && hasHeader("Vertebrate Study Y/N"))
|
||||
then
|
||||
$table.getEntitiesOfType("CBI_author").forEach(entity -> dictionary.addMultipleAuthorsAsRecommendation(entity));
|
||||
end
|
||||
|
||||
rule "14: Add CBI_author with \"et al.\" Regex (non vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, $section)
|
||||
.forEach(entity -> {
|
||||
entity.setRedaction(true);
|
||||
entity.setRedactionReason("Author found by \"et al\" regex");
|
||||
entity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
entity.addMatchedRule(14);
|
||||
entity.addEngine(Engine.RULE);
|
||||
insert(entity);
|
||||
dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false);
|
||||
});
|
||||
end
|
||||
|
||||
rule "15: Add CBI_author with \"et al.\" Regex (vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(containsString("et al."))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, $section)
|
||||
.forEach(entity -> {
|
||||
entity.setRedaction(true);
|
||||
entity.setRedactionReason("Author found by \"et al\" regex");
|
||||
entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
entity.addMatchedRule(15);
|
||||
entity.addEngine(Engine.RULE);
|
||||
insert(entity);
|
||||
dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false);
|
||||
});
|
||||
end
|
||||
|
||||
rule "16: Add recommendation for Addresses in Test Organism sections"
|
||||
when
|
||||
$section: Section(excludesTables, containsString("Species") && containsString("Source") && !containsString("Species:") && !containsString("Source:"))
|
||||
then
|
||||
entityCreationService.lineAfterString("Source", "CBI_address", EntityType.RECOMMENDATION, $section)
|
||||
.forEach(entity -> {
|
||||
entity.setRedactionReason("Line after \"Source\" in Test Organism Section");
|
||||
entity.addEngine(Engine.RULE);
|
||||
entity.addMatchedRule(16);
|
||||
insert(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "17: Add recommendation for Addresses in Test Animals sections"
|
||||
when
|
||||
$section: Section(excludesTables, containsString("Species:"), containsString("Source:"))
|
||||
then
|
||||
entityCreationService.lineAfterString("Source:", "CBI_address", EntityType.RECOMMENDATION, $section)
|
||||
.forEach(entity -> {
|
||||
entity.setRedactionReason("Line after \"Source:\" in Test Animals Section");
|
||||
entity.addEngine(Engine.RULE);
|
||||
entity.addMatchedRule(17);
|
||||
insert(entity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "18.0: Do not redact Names and Addresses if published information found in section without tables"
|
||||
when
|
||||
$section: Paragraph(hasEntitiesOfType("published_information"),
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
then
|
||||
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.setRedaction(false);
|
||||
redactionEntity.setRedactionReason("Published Information found");
|
||||
redactionEntity.addReferences($section.getEntitiesOfType("published_information"));
|
||||
});
|
||||
end
|
||||
|
||||
rule "18.1: Do not redact Names and Addresses if published information found in same table row"
|
||||
when
|
||||
$table: Table(hasEntitiesOfType("published_information"),
|
||||
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
|
||||
then
|
||||
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("CBI_author", "CBI_address"))
|
||||
.forEach(redactionEntity -> {
|
||||
redactionEntity.setRedaction(false);
|
||||
redactionEntity.setRedactionReason("Published Information found in row");
|
||||
redactionEntity.addReferences($table.getEntitiesOfTypeInSameRow("published_information", redactionEntity));
|
||||
});
|
||||
end
|
||||
|
||||
// --------------------------------------- PII rules -------------------------------------------------------------------
|
||||
|
||||
rule "19: Redact all PII (non vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$pii: RedactionEntity(type == "PII", redaction == false)
|
||||
then
|
||||
$pii.setRedaction(true);
|
||||
$pii.setRedactionReason("Personal Information found");
|
||||
$pii.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
$pii.addMatchedRule(19);
|
||||
end
|
||||
|
||||
rule "20: Redact all PII (vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$pii: RedactionEntity(type == "PII", redaction == false)
|
||||
then
|
||||
$pii.setRedaction(true);
|
||||
$pii.setRedactionReason("Personal Information found");
|
||||
$pii.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
$pii.addMatchedRule(20);
|
||||
end
|
||||
|
||||
rule "21: Redact Emails by RegEx (Non vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(containsString("@"))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section)
|
||||
.forEach(emailEntity -> {
|
||||
emailEntity.setRedaction(true);
|
||||
emailEntity.addEngine(Engine.RULE);
|
||||
emailEntity.setRedactionReason("Found by Email Regex");
|
||||
emailEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
emailEntity.addMatchedRule(21);
|
||||
insert(emailEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "22: Redact Emails by RegEx (vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(containsString("@"))
|
||||
then
|
||||
entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section)
|
||||
.forEach(emailEntity -> {
|
||||
emailEntity.setRedaction(true);
|
||||
emailEntity.addEngine(Engine.RULE);
|
||||
emailEntity.setRedactionReason("Found by Email Regex");
|
||||
emailEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
emailEntity.addMatchedRule(22);
|
||||
insert(emailEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "25: Redact Phone and Fax by RegEx (non vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(containsString("Contact") ||
|
||||
containsString("Telephone") ||
|
||||
containsString("Phone") ||
|
||||
containsString("Ph.") ||
|
||||
containsString("Fax") ||
|
||||
containsString("Tel") ||
|
||||
containsString("Ter") ||
|
||||
containsString("Mobile") ||
|
||||
containsString("Fel") ||
|
||||
containsString("Fer"))
|
||||
then
|
||||
entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section)
|
||||
.forEach(contactEntity -> {
|
||||
contactEntity.setRedaction(true);
|
||||
contactEntity.addEngine(Engine.RULE);
|
||||
contactEntity.setRedactionReason("Found by Email Regex");
|
||||
contactEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
contactEntity.addMatchedRule(25);
|
||||
insert(contactEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "26: Redact Phone and Fax by RegEx (vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(containsString("Contact") ||
|
||||
containsString("Telephone") ||
|
||||
containsString("Phone") ||
|
||||
containsString("Ph.") ||
|
||||
containsString("Fax") ||
|
||||
containsString("Tel") ||
|
||||
containsString("Ter") ||
|
||||
containsString("Mobile") ||
|
||||
containsString("Fel") ||
|
||||
containsString("Fer"))
|
||||
then
|
||||
entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section)
|
||||
.forEach(contactEntity -> {
|
||||
contactEntity.setRedaction(true);
|
||||
contactEntity.addEngine(Engine.RULE);
|
||||
contactEntity.setRedactionReason("Found by Email Regex");
|
||||
contactEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
contactEntity.addMatchedRule(26);
|
||||
insert(contactEntity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
rule "27: Redact AUTHOR(S) (non vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(excludesTables, containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:"))
|
||||
then
|
||||
entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
|
||||
.forEach(authorEntity -> {
|
||||
authorEntity.setRedaction(true);
|
||||
authorEntity.addMatchedRule(27);
|
||||
authorEntity.addEngine(Engine.RULE);
|
||||
authorEntity.setRedactionReason("AUTHOR(S) was found");
|
||||
authorEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
insert(authorEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "28: Redact AUTHOR(S) (vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(excludesTables, containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:"))
|
||||
then
|
||||
entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
|
||||
.forEach(authorEntity -> {
|
||||
authorEntity.setRedaction(true);
|
||||
authorEntity.addMatchedRule(28);
|
||||
authorEntity.addEngine(Engine.RULE);
|
||||
authorEntity.setRedactionReason("AUTHOR(S) was found");
|
||||
authorEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
insert(authorEntity);
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
rule "29: Redact AUTHOR(S) (non vertebrate study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(excludesTables, containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:"))
|
||||
then
|
||||
entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
|
||||
.forEach(authorEntity -> {
|
||||
authorEntity.setRedaction(true);
|
||||
authorEntity.addMatchedRule(29);
|
||||
authorEntity.addEngine(Engine.RULE);
|
||||
authorEntity.setRedactionReason("AUTHOR(S) was found");
|
||||
authorEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
insert(authorEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "30: Redact AUTHOR(S) (vertebrate study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$section: Section(excludesTables, containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:"))
|
||||
then
|
||||
entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
|
||||
.forEach(authorEntity -> {
|
||||
authorEntity.setRedaction(true);
|
||||
authorEntity.addMatchedRule(30);
|
||||
authorEntity.addEngine(Engine.RULE);
|
||||
authorEntity.setRedactionReason("AUTHOR(S) was found");
|
||||
authorEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
insert(authorEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "31: Redact PERFORMING LABORATORY (Non vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value == "Yes")
|
||||
$section: Section(excludesTables, containsString("PERFORMING LABORATORY:"))
|
||||
then
|
||||
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section)
|
||||
.forEach(laboratoryEntity -> {
|
||||
laboratoryEntity.setRedaction(false);
|
||||
laboratoryEntity.addMatchedRule(31);
|
||||
laboratoryEntity.addEngine(Engine.RULE);
|
||||
laboratoryEntity.setRedactionReason("PERFORMING LABORATORY was found for non vertebrate study");
|
||||
dictionary.addLocalDictionaryEntry(laboratoryEntity);
|
||||
insert(laboratoryEntity);
|
||||
});
|
||||
end
|
||||
|
||||
rule "32: Redact PERFORMING LABORATORY (Vertebrate study)"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value == "Yes")
|
||||
$section: Section(excludesTables, containsString("PERFORMING LABORATORY:"))
|
||||
then
|
||||
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section)
|
||||
.forEach(laboratoryEntity -> {
|
||||
laboratoryEntity.setRedaction(true);
|
||||
laboratoryEntity.addMatchedRule(32);
|
||||
laboratoryEntity.addEngine(Engine.RULE);
|
||||
laboratoryEntity.setRedactionReason("PERFORMING LABORATORY was found");
|
||||
laboratoryEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
dictionary.addLocalDictionaryEntry(laboratoryEntity);
|
||||
insert(laboratoryEntity);
|
||||
});
|
||||
end
|
||||
|
||||
// --------------------------------------- other rules -------------------------------------------------------------------
|
||||
|
||||
rule "33: Purity Hint"
|
||||
when
|
||||
$section: Section(containsStringIgnoreCase("purity"))
|
||||
then
|
||||
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.ENTITY, 1, $section)
|
||||
.forEach(hint -> {
|
||||
hint.addEngine(Engine.RULE);
|
||||
hint.addMatchedRule(33);
|
||||
});
|
||||
end
|
||||
|
||||
rule "34: Redact signatures (not Vertebrate Study)"
|
||||
when
|
||||
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$signature: Image(imageType == ImageType.SIGNATURE)
|
||||
then
|
||||
$signature.setRedaction(true);
|
||||
$signature.setMatchedRule(34);
|
||||
$signature.setRedactionReason("Signature Found");
|
||||
$signature.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
rule "35: Redact signatures (Vertebrate Study)"
|
||||
when
|
||||
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
|
||||
$signature: Image(imageType == ImageType.SIGNATURE)
|
||||
then
|
||||
$signature.setRedaction(true);
|
||||
$signature.setMatchedRule(35);
|
||||
$signature.setRedactionReason("Signature Found");
|
||||
$signature.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
|
||||
rule "36: Redact logos"
|
||||
when
|
||||
not FileAttribute(label == "Vertbrate Study", value.toLowerCase() == "yes")
|
||||
$logo: Image(imageType == ImageType.LOGO)
|
||||
then
|
||||
$logo.setRedaction(true);
|
||||
$logo.setMatchedRule(36);
|
||||
$logo.setRedactionReason("Logo Found");
|
||||
$logo.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
|
||||
end
|
||||
|
||||
// --------------------------------------- NER Entities rules -------------------------------------------------------------------
|
||||
|
||||
rule "add NER Entities of type CBI_author"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("CBI_author"))
|
||||
then
|
||||
nerEntities.streamEntitiesOfType("CBI_author")
|
||||
.map(nerEntity -> entityCreationService.byNerEntity(nerEntity, EntityType.RECOMMENDATION, document))
|
||||
.forEach(entity -> insert(entity));
|
||||
end
|
||||
|
||||
rule "combine and add NER Entities as CBI_address"
|
||||
salience 999
|
||||
when
|
||||
nerEntities: NerEntities(hasEntitiesOfType("ORG") || hasEntitiesOfType("STREET") || hasEntitiesOfType("CITY"))
|
||||
then
|
||||
nerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
|
||||
.map(boundary -> entityCreationService.byBoundary(boundary, "CBI_address", EntityType.RECOMMENDATION, document))
|
||||
.forEach(entity -> {
|
||||
entity.addEngine(Engine.NER);
|
||||
insert(entity);
|
||||
});
|
||||
end
|
||||
|
||||
// --------------------------------------- manual redaction rules -------------------------------------------------------------------
|
||||
|
||||
rule "Apply manual resize redaction"
|
||||
salience 128
|
||||
when
|
||||
$resizeRedaction: ManualResizeRedaction($id: annotationId)
|
||||
$entityToBeResized: RedactionEntity(matchesAnnotationId($id))
|
||||
then
|
||||
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
|
||||
retract($resizeRedaction);
|
||||
update($entityToBeResized);
|
||||
end
|
||||
|
||||
rule "Apply id removals that are valid and not in forced redactions to Entity"
|
||||
salience 128
|
||||
when
|
||||
IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToBeRemoved.removeFromGraph();
|
||||
retract($entityToBeRemoved);
|
||||
end
|
||||
|
||||
rule "Apply id removals that are valid and not in forced redactions to Image"
|
||||
salience 128
|
||||
when
|
||||
IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
|
||||
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
|
||||
$entityToBeRemoved: Image($id == id)
|
||||
then
|
||||
$entityToBeRemoved.setIgnored(true);
|
||||
retract($entityToBeRemoved);
|
||||
end
|
||||
|
||||
rule "Apply force redaction"
|
||||
salience 128
|
||||
when
|
||||
$forceRedaction: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
|
||||
$entityToForce: RedactionEntity(matchesAnnotationId($id))
|
||||
then
|
||||
$entityToForce.setLegalBasis($legalBasis);
|
||||
$entityToForce.setRedaction(true);
|
||||
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
|
||||
retract($forceRedaction);
|
||||
update($entityToForce);
|
||||
end
|
||||
|
||||
rule "Apply image recategorization"
|
||||
salience 128
|
||||
when
|
||||
ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
|
||||
$image: Image($id == id)
|
||||
then
|
||||
$image.setImageType(ImageType.fromString($imageType));
|
||||
end
|
||||
|
||||
// --------------------------------------- merging rules -------------------------------------------------------------------
|
||||
|
||||
rule "remove Entity contained by Entity of same type"
|
||||
salience 65
|
||||
when
|
||||
$larger: RedactionEntity($type: type, $entityType: entityType)
|
||||
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$contained.removeFromGraph();
|
||||
retract($contained);
|
||||
end
|
||||
|
||||
rule "merge intersecting Entities of same type"
|
||||
salience 64
|
||||
when
|
||||
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$first.removeFromGraph();
|
||||
$second.removeFromGraph();
|
||||
RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document);
|
||||
retract($first);
|
||||
retract($second);
|
||||
insert(mergedEntity);
|
||||
end
|
||||
|
||||
rule "remove Entity of type ENTITY when contained by FALSE_POSITIVE"
|
||||
salience 64
|
||||
when
|
||||
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE)
|
||||
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$entity.removeFromGraph();
|
||||
retract($entity)
|
||||
end
|
||||
|
||||
rule "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
|
||||
salience 64
|
||||
when
|
||||
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION)
|
||||
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$recommendation.removeFromGraph();
|
||||
retract($recommendation);
|
||||
end
|
||||
|
||||
rule "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"
|
||||
salience 256
|
||||
when
|
||||
$entity: RedactionEntity($type: type, entityType == EntityType.ENTITY)
|
||||
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$entity.addEngines($recommendation.getEngines());
|
||||
$recommendation.removeFromGraph();
|
||||
retract($recommendation);
|
||||
end
|
||||
|
||||
rule "remove Entity of type RECOMMENDATION when contained by ENTITY"
|
||||
salience 256
|
||||
when
|
||||
$entity: RedactionEntity(entityType == EntityType.ENTITY)
|
||||
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
|
||||
then
|
||||
$recommendation.removeFromGraph();
|
||||
retract($recommendation);
|
||||
end
|
||||
|
||||
rule "remove Entity of lower rank, when equal boundaries and entityType"
|
||||
salience 32
|
||||
when
|
||||
$higherRank: RedactionEntity($type: type, $entityType: entityType, $boundary: boundary)
|
||||
$lowerRank: RedactionEntity($boundary == boundary, type != $type, entityType == $entityType, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !redaction)
|
||||
then
|
||||
$lowerRank.removeFromGraph();
|
||||
retract($lowerRank);
|
||||
end
|
||||
|
||||
// --------------------------------------- FileAttribute Rules -------------------------------------------------------------------
|
||||
|
||||
rule "remove duplicate FileAttributes"
|
||||
salience 64
|
||||
when
|
||||
$fileAttribute: FileAttribute($label: label, $value: value)
|
||||
$duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value)
|
||||
then
|
||||
retract($duplicate);
|
||||
end
|
||||
|
||||
// --------------------------------------- local dictionary search -------------------------------------------------------------------
|
||||
|
||||
rule "run local dictionary search"
|
||||
agenda-group "LOCAL_DICTIONARY_ADDS"
|
||||
salience -999
|
||||
when
|
||||
DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels()
|
||||
then
|
||||
entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document)
|
||||
.forEach(entity -> {
|
||||
entity.addEngine(Engine.RULE);
|
||||
insert(entity);
|
||||
});
|
||||
end
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@ -3,7 +3,6 @@ package drools
|
||||
import static java.lang.String.format;
|
||||
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.anyMatch;
|
||||
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.exactMatch;
|
||||
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.mapper.PropertiesMapper.parseImageType;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.LinkedList;
|
||||
@ -245,7 +244,7 @@ rule "10: Redact row if row contains \"determination of residues\" and livestock
|
||||
|
||||
rule "11: Redact if CTL/* or BL/* was found"
|
||||
when
|
||||
$section: Section(excludesTables, (containsString("CTL/") || containsString("BL/")))
|
||||
$section: Section(!hasTables, (containsString("CTL/") || containsString("BL/")))
|
||||
then
|
||||
entityCreationService.byString("CTL/", "must_redact", EntityType.ENTITY, $section)
|
||||
.forEach(mustRedactEntity -> insert(mustRedactEntity));
|
||||
@ -279,7 +278,7 @@ rule "12: Add CBI_author with \"et al.\" Regex"
|
||||
|
||||
rule "13: Add recommendation for Addresses in Test Organism sections"
|
||||
when
|
||||
$section: Section(excludesTables, containsString("Species") && containsString("Source") && !containsString("Species:") && !containsString("Source:"))
|
||||
$section: Section(!hasTables, containsString("Species") && containsString("Source") && !containsString("Species:") && !containsString("Source:"))
|
||||
then
|
||||
entityCreationService.lineAfterString("Source", "CBI_address", EntityType.RECOMMENDATION, $section)
|
||||
.forEach(redactionEntity -> {
|
||||
@ -292,7 +291,7 @@ rule "13: Add recommendation for Addresses in Test Organism sections"
|
||||
rule "14: Add recommendation for Addresses in Test Animals sections"
|
||||
|
||||
when
|
||||
$section: Section(excludesTables, containsString("Species:"), containsString("Source:"))
|
||||
$section: Section(!hasTables, containsString("Species:"), containsString("Source:"))
|
||||
then
|
||||
entityCreationService.lineAfterString("Source:", "CBI_address", EntityType.RECOMMENDATION, $section)
|
||||
.forEach(redactionEntity -> {
|
||||
@ -386,7 +385,7 @@ rule "18: redact line between contact keywords"
|
||||
rule "19: Redact AUTHOR(S)"
|
||||
when
|
||||
FileAttribute(placeholder == "{fileattributes.vertebrateStudy}", value == "true")
|
||||
$section: Section(excludesTables, containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"))
|
||||
$section: Section(!hasTables, containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"))
|
||||
then
|
||||
entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
|
||||
.forEach(authorEntity -> {
|
||||
@ -400,7 +399,7 @@ rule "19: Redact AUTHOR(S)"
|
||||
|
||||
rule "20: Redact PERFORMING LABORATORY"
|
||||
when
|
||||
$section: Section(excludesTables, containsString("PERFORMING LABORATORY:"))
|
||||
$section: Section(!hasTables, containsString("PERFORMING LABORATORY:"))
|
||||
then
|
||||
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
|
||||
.forEach(authorEntity -> {
|
||||
@ -414,7 +413,7 @@ rule "20: Redact PERFORMING LABORATORY"
|
||||
|
||||
rule "21: Redact On behalf of Sequani Ltd.:"
|
||||
when
|
||||
$section: Section(excludesTables, containsString("On behalf of Sequani Ltd.: Name Title"))
|
||||
$section: Section(!hasTables, containsString("On behalf of Sequani Ltd.: Name Title"))
|
||||
then
|
||||
entityCreationService.betweenStrings("On behalf of Sequani Ltd.: Name Title", "On behalf of", "PII", EntityType.ENTITY, $section)
|
||||
.forEach(authorEntity -> {
|
||||
@ -428,7 +427,7 @@ rule "21: Redact On behalf of Sequani Ltd.:"
|
||||
|
||||
rule "22: Redact On behalf of Syngenta Ltd.:"
|
||||
when
|
||||
$section: Section(excludesTables, containsString("On behalf of Syngenta Ltd.: Name Title"))
|
||||
$section: Section(!hasTables, containsString("On behalf of Syngenta Ltd.: Name Title"))
|
||||
then
|
||||
entityCreationService.betweenStrings("On behalf of Syngenta Ltd.: Name Title", "Study dates", "PII", EntityType.ENTITY, $section)
|
||||
.forEach(authorEntity -> {
|
||||
@ -506,7 +505,7 @@ rule "101: Redact CAS Number"
|
||||
|
||||
rule "102: Guidelines FileAttributes"
|
||||
when
|
||||
$section: Section(excludesTables, (containsString("DATA REQUIREMENT(S):") || containsString("TEST GUIDELINE(S):")) && (containsString("OECD") || containsString("EPA") || containsString("OPPTS")))
|
||||
$section: Section(!hasTables, (containsString("DATA REQUIREMENT(S):") || containsString("TEST GUIDELINE(S):")) && (containsString("OECD") || containsString("EPA") || containsString("OPPTS")))
|
||||
then
|
||||
RedactionSearchUtility.findBoundariesByRegex("OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", $section.getTextBlock()).stream()
|
||||
.map(boundary -> $section.getTextBlock().subSequence(boundary).toString())
|
||||
@ -590,7 +589,7 @@ rule "Apply image recategorization"
|
||||
ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
|
||||
$image: Image($id == id)
|
||||
then
|
||||
$image.setImageType(parseImageType($imageType));
|
||||
$image.setImageType(ImageType.fromString($imageType));
|
||||
end
|
||||
|
||||
// --------------------------------------- merging rules -------------------------------------------------------------------
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user