RED-6009: Document Tree Structure

This commit is contained in:
Kilian Schüttler 2023-06-15 21:07:47 +02:00
parent 108da249fa
commit 2a87eede6d
12 changed files with 934 additions and 1114 deletions

View File

@ -10,7 +10,6 @@ import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.exception.NotFoundException;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
@ -83,7 +82,7 @@ public class Document implements GenericSemanticNode {
@Override
public Headline getHeadline() {
return streamAllSubNodesOfType(NodeType.HEADLINE).map(node -> (Headline) node).findFirst().orElseThrow(() -> new NotFoundException("No Headlines found in this document!"));
return streamAllSubNodesOfType(NodeType.HEADLINE).map(node -> (Headline) node).findFirst().orElseGet(Headline::empty);
}

View File

@ -6,6 +6,7 @@ import java.util.Set;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import lombok.AccessLevel;
@ -68,4 +69,10 @@ public class Headline implements GenericSemanticNode {
return this;
}
public static Headline empty() {
return Headline.builder().leafTextBlock(AtomicTextBlock.empty(-1L, 0, new Page(), -1, null)).build();
}
}

View File

@ -15,12 +15,14 @@ import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.experimental.FieldDefaults;
@Getter
@Setter
@Builder
@NoArgsConstructor
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
public class Page {

View File

@ -104,9 +104,10 @@ public interface SemanticNode {
/**
* Traverses the Tree up, until it hits a Headline or hits a Section which will then return the first Headline from its children.
* Throws NotFoundException if no Headline is found this way
* If no Headline is found this way, it will recursively traverse the tree up and try again until it hits the root, where it will perform a BFS.
* If no Headline exists anywhere in the Document a dummy Headline is returned.
*
* @return First Headline found
* @return First Headline found.
*/
default Headline getHeadline() {
@ -115,7 +116,7 @@ public interface SemanticNode {
/**
* Checks if its TocId has a length greater than zero.
* Checks if its TreeId has a length greater than zero.
*
* @return boolean indicating whether this Node has a Parent in the DocumentTree
*/

View File

@ -121,6 +121,23 @@ public class Table implements SemanticNode {
}
/**
* Streams all entities in this table, that appear in a row, which contains no entity of any of the provided types.
*
* @param types type strings to check whether a row contains an entity like them
* @return Stream of all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
*/
public Stream<RedactionEntity> streamEntitiesWhereRowContainsNoEntitiesOfType(List<String> types) {
return IntStream.range(0, numberOfRows)
.boxed()
.filter(rowNumber -> streamRow(rowNumber).map(TableCell::getEntities).flatMap(Collection::stream).noneMatch(entity -> types.contains(entity.getType())))
.flatMap(this::streamRow)
.map(TableCell::getEntities)
.flatMap(Collection::stream);
}
/**
* Returns a TableCell at the provided row and column location.
*

View File

@ -95,7 +95,7 @@ import lombok.extern.slf4j.Slf4j;
@Import(RulesTest.RulesTestConfiguration.class)
public class RulesTest {
private static final String RULES_PATH = "drools/prod_syngenta_new.drl";
private static final String RULES_PATH = "drools/rules.drl";
private static final String RULES = loadFromClassPath(RULES_PATH);
private static final String VERTEBRATE = "vertebrate";
private static final String ADDRESS = "CBI_address";

View File

@ -1,7 +1,6 @@
package com.iqser.red.service.redaction.v1.server.document.graph;
import static java.util.stream.Collectors.toMap;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.when;
@ -40,7 +39,7 @@ import lombok.SneakyThrows;
public class MigrationPocTest extends BuildDocumentIntegrationTest {
private static final String RULES = loadFromClassPath("drools/rules.drl");
@Autowired
private RedactionLogEntryAdapter redactionLogAdapter;
@Autowired
@ -99,8 +98,6 @@ public class MigrationPocTest extends BuildDocumentIntegrationTest {
logPrecision(migratedIds, newIds);
logRecall(migratedIds, newIds);
assertEquals(originalRedactionLog.getRedactionLogEntry().size(), migratedEntities.size());
}
@ -110,7 +107,7 @@ public class MigrationPocTest extends BuildDocumentIntegrationTest {
System.out.printf("precision %.2f\n", precision);
System.out.println("New Entries");
getAddedEntries(migratedIds, newIds).forEach(System.out::println);
assertTrue(precision > 0.9);
assertTrue(precision >= 0.85);
System.out.println();
}
@ -121,7 +118,7 @@ public class MigrationPocTest extends BuildDocumentIntegrationTest {
System.out.printf("recall %.2f\n", recall);
System.out.println("Missing entries");
getMissingEntries(migratedIds, newIds).forEach(System.out::println);
assertTrue(recall > 0.9);
assertTrue(recall >= 0.85);
System.out.println();
}

View File

@ -6,17 +6,19 @@ import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.u
import java.util.List;
import java.util.LinkedList;
import java.util.HashSet;
import java.util.Set
import java.util.stream.Collectors;
import java.util.Collection;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.ImageType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
import java.util.Set
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryModel;
@ -26,21 +28,26 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.ManualRedactionApplicationService;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary
import java.util.stream.Collectors
import java.util.Collection
import java.util.stream.Stream
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility;
global Document document
global EntityCreationService entityCreationService
global ManualRedactionApplicationService manualRedactionApplicationService
global NerEntitiesAdapter nerEntitiesAdapter
global Dictionary dictionary
//------------------------------------ queries ------------------------------------
query "getFileAttributes"
$fileAttribute: FileAttribute()
end
// --------------------------------------- manual redaction rules -------------------------------------------------------------------
rule "Apply manual resize redaction"

View File

@ -1,725 +0,0 @@
package drools
import static java.lang.String.format;
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.anyMatch;
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.exactMatch;
import java.util.List;
import java.util.LinkedList;
import java.util.HashSet;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.ImageType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
import java.util.Set
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryModel;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.ManualRedactionApplicationService;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
import java.util.stream.Collectors;
import java.util.Collection;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility;
global Document document
global EntityCreationService entityCreationService
global ManualRedactionApplicationService manualRedactionApplicationService
global NerEntitiesAdapter nerEntitiesAdapter
global Dictionary dictionary
// --------------------------------------- queries -------------------------------------------------------------------
query "getFileAttributes"
$fileAttribute: FileAttribute()
end
// --------------------------------------- Syngenta specific laboratory recommendation -------------------------------------------------------------------
rule "0: Recommend CTL/BL laboratory that start with BL or CTL"
when
$section: Section(containsString("CT") || containsString("BL"))
then
/* Regular expression: ((\b((([Cc]T(([1ILli\/])| L|~P))|(BL))[\. ]?([\dA-Ziltphz~\/.:!]| ?[\(',][Ppi](\(e)?|([\(-?']\/))+( ?[\(\/\dA-Znasieg]+)?)\b( ?\/? ?\d+)?)|(\bCT[L1i]\b)) */
entityCreationService.byRegexIgnoreCase("((\\b((([Cc]T(([1ILli\\/])| L|~P))|(BL))[\\. ]?([\\dA-Ziltphz~\\/.:!]| ?[\\(',][Ppi](\\(e)?|([\\(-?']\\/))+( ?[\\(\\/\\dA-Znasieg]+)?)\\b( ?\\/? ?\\d+)?)|(\\bCT[L1i]\\b))", "CBI_address", EntityType.RECOMMENDATION, $section)
.forEach(entity -> {
entity.addMatchedRule(0);
entity.addEngine(Engine.RULE);
insert(entity);
});
end
// --------------------------------------- CBI Rules -------------------------------------------------------------------
rule "1: Redact CBI Authors (non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_author", entityType == EntityType.ENTITY)
then
$entity.setRedaction(true);
$entity.addMatchedRule(1);
$entity.setRedactionReason("Author found");
$entity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "2: Redact CBI Authors (vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_author", entityType == EntityType.ENTITY)
then
$entity.setRedaction(true);
$entity.addMatchedRule(2);
$entity.setRedactionReason("Author found");
$entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "3: Don't redact CBI Address (Non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_address", entityType == EntityType.ENTITY)
then
$entity.setRedaction(false);
$entity.addMatchedRule(3);
$entity.setRedactionReason("Address found for non vertebrate study");
end
rule "4: Redact CBI Address (Vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$entity: RedactionEntity(type == "CBI_address", entityType == EntityType.ENTITY)
then
$entity.setRedaction(true);
$entity.addMatchedRule(4);
$entity.setRedactionReason("Address found");
$entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "5: Add FALSE_POSITIVE Entity for genitive CBI_author"
when
$entity: RedactionEntity(type == "CBI_author", anyMatch(textAfter, "[''ʼˈ´`ʻ']s"), redaction)
then
RedactionEntity falsePositive = entityCreationService.byBoundary($entity.getBoundary(), "CBI_author", EntityType.FALSE_POSITIVE, document);
falsePositive.addMatchedRule(5);
insert(falsePositive);
end
rule "6: Redact all Cell's with Header Author(s) as CBI_author (non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$table: Table(hasHeader("Author(s)"))
then
$table.streamTableCellsWithHeader("Author(s)")
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY))
.forEach(redactionEntity -> {
redactionEntity.setRedaction(true);
redactionEntity.addMatchedRule(6);
redactionEntity.addEngine(Engine.RULE);
redactionEntity.setRedactionReason("Author(s) found");
redactionEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
insert(redactionEntity);
});
end
rule "7: Redact all Cell's with Header Author(s) as CBI_author (vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$table: Table(hasHeader("Author(s)"))
then
$table.streamTableCellsWithHeader("Author(s)")
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY))
.forEach(redactionEntity -> {
redactionEntity.setRedaction(true);
redactionEntity.addMatchedRule(7);
redactionEntity.addEngine(Engine.RULE);
redactionEntity.setRedactionReason("Author(s) found");
redactionEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
insert(redactionEntity);
});
end
rule "8: Redact all Cell's with Header Author as CBI_author"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$table: Table(hasHeader("Author"))
then
$table.streamTableCellsWithHeader("Author")
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY))
.forEach(redactionEntity -> {
redactionEntity.setRedaction(true);
redactionEntity.addMatchedRule(8);
redactionEntity.addEngine(Engine.RULE);
redactionEntity.setRedactionReason("Author found");
redactionEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
insert(redactionEntity);
});
end
rule "9: Redact all Cell's with Header Author as CBI_author"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$table: Table(hasHeader("Author"))
then
$table.streamTableCellsWithHeader("Author")
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "CBI_author", EntityType.ENTITY))
.forEach(redactionEntity -> {
redactionEntity.setRedaction(true);
redactionEntity.addMatchedRule(9);
redactionEntity.addEngine(Engine.RULE);
redactionEntity.setRedactionReason("Author found");
redactionEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
insert(redactionEntity);
});
end
rule "10: Recommend all CBI_author entities in Table with Vertebrate Study Y/N Header"
agenda-group "LOCAL_DICTIONARY_ADDS"
salience -1
when
$table: Table(hasHeader("Author(s)") && hasHeader("Vertebrate Study Y/N"))
then
$table.getEntitiesOfType("CBI_author").forEach(entity -> dictionary.addMultipleAuthorsAsRecommendation(entity));
end
rule "14: Add CBI_author with \"et al.\" Regex (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, $section)
.forEach(entity -> {
entity.setRedaction(true);
entity.setRedactionReason("Author found by \"et al\" regex");
entity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
entity.addMatchedRule(14);
entity.addEngine(Engine.RULE);
insert(entity);
dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false);
});
end
rule "15: Add CBI_author with \"et al.\" Regex (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, $section)
.forEach(entity -> {
entity.setRedaction(true);
entity.setRedactionReason("Author found by \"et al\" regex");
entity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
entity.addMatchedRule(15);
entity.addEngine(Engine.RULE);
insert(entity);
dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false);
});
end
rule "16: Add recommendation for Addresses in Test Organism sections"
when
$section: Section(excludesTables, containsString("Species") && containsString("Source") && !containsString("Species:") && !containsString("Source:"))
then
entityCreationService.lineAfterString("Source", "CBI_address", EntityType.RECOMMENDATION, $section)
.forEach(entity -> {
entity.setRedactionReason("Line after \"Source\" in Test Organism Section");
entity.addEngine(Engine.RULE);
entity.addMatchedRule(16);
insert(entity);
});
end
rule "17: Add recommendation for Addresses in Test Animals sections"
when
$section: Section(excludesTables, containsString("Species:"), containsString("Source:"))
then
entityCreationService.lineAfterString("Source:", "CBI_address", EntityType.RECOMMENDATION, $section)
.forEach(entity -> {
entity.setRedactionReason("Line after \"Source:\" in Test Animals Section");
entity.addEngine(Engine.RULE);
entity.addMatchedRule(17);
insert(entity);
});
end
rule "18.0: Do not redact Names and Addresses if published information found in section without tables"
when
$section: Paragraph(hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(redactionEntity -> {
redactionEntity.setRedaction(false);
redactionEntity.setRedactionReason("Published Information found");
redactionEntity.addReferences($section.getEntitiesOfType("published_information"));
});
end
rule "18.1: Do not redact Names and Addresses if published information found in same table row"
when
$table: Table(hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(redactionEntity -> {
redactionEntity.setRedaction(false);
redactionEntity.setRedactionReason("Published Information found in row");
redactionEntity.addReferences($table.getEntitiesOfTypeInSameRow("published_information", redactionEntity));
});
end
// --------------------------------------- PII rules -------------------------------------------------------------------
rule "19: Redact all PII (non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$pii: RedactionEntity(type == "PII", redaction == false)
then
$pii.setRedaction(true);
$pii.setRedactionReason("Personal Information found");
$pii.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
$pii.addMatchedRule(19);
end
rule "20: Redact all PII (vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$pii: RedactionEntity(type == "PII", redaction == false)
then
$pii.setRedaction(true);
$pii.setRedactionReason("Personal Information found");
$pii.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
$pii.addMatchedRule(20);
end
rule "21: Redact Emails by RegEx (Non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(containsString("@"))
then
entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section)
.forEach(emailEntity -> {
emailEntity.setRedaction(true);
emailEntity.addEngine(Engine.RULE);
emailEntity.setRedactionReason("Found by Email Regex");
emailEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
emailEntity.addMatchedRule(21);
insert(emailEntity);
});
end
rule "22: Redact Emails by RegEx (vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(containsString("@"))
then
entityCreationService.byRegex("\\b([A-Za-z0-9._%+\\-]+@[A-Za-z0-9.\\-]+\\.[A-Za-z\\-]{1,23}[A-Za-z])\\b", "PII", EntityType.ENTITY, 1, $section)
.forEach(emailEntity -> {
emailEntity.setRedaction(true);
emailEntity.addEngine(Engine.RULE);
emailEntity.setRedactionReason("Found by Email Regex");
emailEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
emailEntity.addMatchedRule(22);
insert(emailEntity);
});
end
rule "25: Redact Phone and Fax by RegEx (non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(containsString("Contact") ||
containsString("Telephone") ||
containsString("Phone") ||
containsString("Ph.") ||
containsString("Fax") ||
containsString("Tel") ||
containsString("Ter") ||
containsString("Mobile") ||
containsString("Fel") ||
containsString("Fer"))
then
entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section)
.forEach(contactEntity -> {
contactEntity.setRedaction(true);
contactEntity.addEngine(Engine.RULE);
contactEntity.setRedactionReason("Found by Email Regex");
contactEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
contactEntity.addMatchedRule(25);
insert(contactEntity);
});
end
rule "26: Redact Phone and Fax by RegEx (vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(containsString("Contact") ||
containsString("Telephone") ||
containsString("Phone") ||
containsString("Ph.") ||
containsString("Fax") ||
containsString("Tel") ||
containsString("Ter") ||
containsString("Mobile") ||
containsString("Fel") ||
containsString("Fer"))
then
entityCreationService.byRegexIgnoreCase("\\b(contact|telephone|phone|ph\\.|fax|tel|ter|mobile|fel|fer)[a-zA-Z\\s]{0,10}[:.\\s]{0,3}([\\+\\d\\(][\\s\\d\\(\\)\\-\\/\\.]{4,100}\\d)\\b", "PII", EntityType.ENTITY, 2, $section)
.forEach(contactEntity -> {
contactEntity.setRedaction(true);
contactEntity.addEngine(Engine.RULE);
contactEntity.setRedactionReason("Found by Email Regex");
contactEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
contactEntity.addMatchedRule(26);
insert(contactEntity);
});
end
rule "27: Redact AUTHOR(S) (non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(excludesTables, containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:"))
then
entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
.forEach(authorEntity -> {
authorEntity.setRedaction(true);
authorEntity.addMatchedRule(27);
authorEntity.addEngine(Engine.RULE);
authorEntity.setRedactionReason("AUTHOR(S) was found");
authorEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
insert(authorEntity);
});
end
rule "28: Redact AUTHOR(S) (vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(excludesTables, containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"), !containsString("STUDY COMPLETION DATE:"))
then
entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
.forEach(authorEntity -> {
authorEntity.setRedaction(true);
authorEntity.addMatchedRule(28);
authorEntity.addEngine(Engine.RULE);
authorEntity.setRedactionReason("AUTHOR(S) was found");
authorEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
insert(authorEntity);
});
end
rule "29: Redact AUTHOR(S) (non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(excludesTables, containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:"))
then
entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
.forEach(authorEntity -> {
authorEntity.setRedaction(true);
authorEntity.addMatchedRule(29);
authorEntity.addEngine(Engine.RULE);
authorEntity.setRedactionReason("AUTHOR(S) was found");
authorEntity.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
insert(authorEntity);
});
end
rule "30: Redact AUTHOR(S) (vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$section: Section(excludesTables, containsString("AUTHOR(S):"), containsString("STUDY COMPLETION DATE:"))
then
entityCreationService.betweenStrings("AUTHOR(S):", "STUDY COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
.forEach(authorEntity -> {
authorEntity.setRedaction(true);
authorEntity.addMatchedRule(30);
authorEntity.addEngine(Engine.RULE);
authorEntity.setRedactionReason("AUTHOR(S) was found");
authorEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
insert(authorEntity);
});
end
rule "31: Redact PERFORMING LABORATORY (Non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
not FileAttribute(label == "Vertebrate Study", value == "Yes")
$section: Section(excludesTables, containsString("PERFORMING LABORATORY:"))
then
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section)
.forEach(laboratoryEntity -> {
laboratoryEntity.setRedaction(false);
laboratoryEntity.addMatchedRule(31);
laboratoryEntity.addEngine(Engine.RULE);
laboratoryEntity.setRedactionReason("PERFORMING LABORATORY was found for non vertebrate study");
dictionary.addLocalDictionaryEntry(laboratoryEntity);
insert(laboratoryEntity);
});
end
rule "32: Redact PERFORMING LABORATORY (Vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
FileAttribute(label == "Vertebrate Study", value == "Yes")
$section: Section(excludesTables, containsString("PERFORMING LABORATORY:"))
then
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section)
.forEach(laboratoryEntity -> {
laboratoryEntity.setRedaction(true);
laboratoryEntity.addMatchedRule(32);
laboratoryEntity.addEngine(Engine.RULE);
laboratoryEntity.setRedactionReason("PERFORMING LABORATORY was found");
laboratoryEntity.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.addLocalDictionaryEntry(laboratoryEntity);
insert(laboratoryEntity);
});
end
// --------------------------------------- other rules -------------------------------------------------------------------
rule "33: Purity Hint"
when
$section: Section(containsStringIgnoreCase("purity"))
then
entityCreationService.byRegexIgnoreCase("(purity ?( of|\\(.{1,20}\\))?( ?:)?) .{0,5}[\\d\\.]+( .{0,4}\\.)? ?%", "hint_only", EntityType.ENTITY, 1, $section)
.forEach(hint -> {
hint.addEngine(Engine.RULE);
hint.addMatchedRule(33);
});
end
rule "34: Redact signatures (not Vertebrate Study)"
when
not FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$signature: Image(imageType == ImageType.SIGNATURE)
then
$signature.setRedaction(true);
$signature.setMatchedRule(34);
$signature.setRedactionReason("Signature Found");
$signature.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "35: Redact signatures (Vertebrate Study)"
when
FileAttribute(label == "Vertebrate Study", value.toLowerCase() == "yes")
$signature: Image(imageType == ImageType.SIGNATURE)
then
$signature.setRedaction(true);
$signature.setMatchedRule(35);
$signature.setRedactionReason("Signature Found");
$signature.setLegalBasis("Article 39(e)(2) of Regulation (EC) No 178/2002");
end
rule "36: Redact logos"
when
not FileAttribute(label == "Vertbrate Study", value.toLowerCase() == "yes")
$logo: Image(imageType == ImageType.LOGO)
then
$logo.setRedaction(true);
$logo.setMatchedRule(36);
$logo.setRedactionReason("Logo Found");
$logo.setLegalBasis("Article 39(e)(3) of Regulation (EC) No 178/2002");
end
// --------------------------------------- NER Entities rules -------------------------------------------------------------------
rule "add NER Entities of type CBI_author"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("CBI_author"))
then
nerEntities.streamEntitiesOfType("CBI_author")
.map(nerEntity -> entityCreationService.byNerEntity(nerEntity, EntityType.RECOMMENDATION, document))
.forEach(entity -> insert(entity));
end
rule "combine and add NER Entities as CBI_address"
salience 999
when
nerEntities: NerEntities(hasEntitiesOfType("ORG") || hasEntitiesOfType("STREET") || hasEntitiesOfType("CITY"))
then
nerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
.map(boundary -> entityCreationService.byBoundary(boundary, "CBI_address", EntityType.RECOMMENDATION, document))
.forEach(entity -> {
entity.addEngine(Engine.NER);
insert(entity);
});
end
// --------------------------------------- manual redaction rules -------------------------------------------------------------------
rule "Apply manual resize redaction"
salience 128
when
$resizeRedaction: ManualResizeRedaction($id: annotationId)
$entityToBeResized: RedactionEntity(matchesAnnotationId($id))
then
manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
retract($resizeRedaction);
update($entityToBeResized);
end
rule "Apply id removals that are valid and not in forced redactions to Entity"
salience 128
when
IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
then
$entityToBeRemoved.removeFromGraph();
retract($entityToBeRemoved);
end
rule "Apply id removals that are valid and not in forced redactions to Image"
salience 128
when
IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
$entityToBeRemoved: Image($id == id)
then
$entityToBeRemoved.setIgnored(true);
retract($entityToBeRemoved);
end
rule "Apply force redaction"
salience 128
when
$forceRedaction: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
$entityToForce: RedactionEntity(matchesAnnotationId($id))
then
$entityToForce.setLegalBasis($legalBasis);
$entityToForce.setRedaction(true);
$entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
retract($forceRedaction);
update($entityToForce);
end
rule "Apply image recategorization"
salience 128
when
ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
$image: Image($id == id)
then
$image.setImageType(ImageType.fromString($imageType));
end
// --------------------------------------- merging rules -------------------------------------------------------------------
rule "remove Entity contained by Entity of same type"
salience 65
when
$larger: RedactionEntity($type: type, $entityType: entityType)
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger)
then
$contained.removeFromGraph();
retract($contained);
end
rule "merge intersecting Entities of same type"
salience 64
when
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger)
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger)
then
$first.removeFromGraph();
$second.removeFromGraph();
RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document);
retract($first);
retract($second);
insert(mergedEntity);
end
rule "remove Entity of type ENTITY when contained by FALSE_POSITIVE"
salience 64
when
$falsePositive: RedactionEntity($type: type, entityType == EntityType.FALSE_POSITIVE)
$entity: RedactionEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized, !skipRemoveEntitiesContainedInLarger)
then
$entity.removeFromGraph();
retract($entity)
end
rule "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
salience 64
when
$falseRecommendation: RedactionEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION)
$recommendation: RedactionEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
then
$recommendation.removeFromGraph();
retract($recommendation);
end
rule "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"
salience 256
when
$entity: RedactionEntity($type: type, entityType == EntityType.ENTITY)
$recommendation: RedactionEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
then
$entity.addEngines($recommendation.getEngines());
$recommendation.removeFromGraph();
retract($recommendation);
end
rule "remove Entity of type RECOMMENDATION when contained by ENTITY"
salience 256
when
$entity: RedactionEntity(entityType == EntityType.ENTITY)
$recommendation: RedactionEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized, !skipRemoveEntitiesContainedInLarger)
then
$recommendation.removeFromGraph();
retract($recommendation);
end
rule "remove Entity of lower rank, when equal boundaries and entityType"
salience 32
when
$higherRank: RedactionEntity($type: type, $entityType: entityType, $boundary: boundary)
$lowerRank: RedactionEntity($boundary == boundary, type != $type, entityType == $entityType, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !redaction)
then
$lowerRank.removeFromGraph();
retract($lowerRank);
end
// --------------------------------------- FileAttribute Rules -------------------------------------------------------------------
rule "remove duplicate FileAttributes"
salience 64
when
$fileAttribute: FileAttribute($label: label, $value: value)
$duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value)
then
retract($duplicate);
end
// --------------------------------------- local dictionary search -------------------------------------------------------------------
rule "run local dictionary search"
agenda-group "LOCAL_DICTIONARY_ADDS"
salience -999
when
DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels()
then
entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document)
.forEach(entity -> {
entity.addEngine(Engine.RULE);
insert(entity);
});
end

View File

@ -3,7 +3,6 @@ package drools
import static java.lang.String.format;
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.anyMatch;
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.exactMatch;
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.mapper.PropertiesMapper.parseImageType;
import java.util.List;
import java.util.LinkedList;
@ -245,7 +244,7 @@ rule "10: Redact row if row contains \"determination of residues\" and livestock
rule "11: Redact if CTL/* or BL/* was found"
when
$section: Section(excludesTables, (containsString("CTL/") || containsString("BL/")))
$section: Section(!hasTables, (containsString("CTL/") || containsString("BL/")))
then
entityCreationService.byString("CTL/", "must_redact", EntityType.ENTITY, $section)
.forEach(mustRedactEntity -> insert(mustRedactEntity));
@ -279,7 +278,7 @@ rule "12: Add CBI_author with \"et al.\" Regex"
rule "13: Add recommendation for Addresses in Test Organism sections"
when
$section: Section(excludesTables, containsString("Species") && containsString("Source") && !containsString("Species:") && !containsString("Source:"))
$section: Section(!hasTables, containsString("Species") && containsString("Source") && !containsString("Species:") && !containsString("Source:"))
then
entityCreationService.lineAfterString("Source", "CBI_address", EntityType.RECOMMENDATION, $section)
.forEach(redactionEntity -> {
@ -292,7 +291,7 @@ rule "13: Add recommendation for Addresses in Test Organism sections"
rule "14: Add recommendation for Addresses in Test Animals sections"
when
$section: Section(excludesTables, containsString("Species:"), containsString("Source:"))
$section: Section(!hasTables, containsString("Species:"), containsString("Source:"))
then
entityCreationService.lineAfterString("Source:", "CBI_address", EntityType.RECOMMENDATION, $section)
.forEach(redactionEntity -> {
@ -386,7 +385,7 @@ rule "18: redact line between contact keywords"
rule "19: Redact AUTHOR(S)"
when
FileAttribute(placeholder == "{fileattributes.vertebrateStudy}", value == "true")
$section: Section(excludesTables, containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"))
$section: Section(!hasTables, containsString("AUTHOR(S):"), containsString("COMPLETION DATE:"))
then
entityCreationService.betweenStrings("AUTHOR(S):", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
.forEach(authorEntity -> {
@ -400,7 +399,7 @@ rule "19: Redact AUTHOR(S)"
rule "20: Redact PERFORMING LABORATORY"
when
$section: Section(excludesTables, containsString("PERFORMING LABORATORY:"))
$section: Section(!hasTables, containsString("PERFORMING LABORATORY:"))
then
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "COMPLETION DATE:", "PII", EntityType.ENTITY, $section)
.forEach(authorEntity -> {
@ -414,7 +413,7 @@ rule "20: Redact PERFORMING LABORATORY"
rule "21: Redact On behalf of Sequani Ltd.:"
when
$section: Section(excludesTables, containsString("On behalf of Sequani Ltd.: Name Title"))
$section: Section(!hasTables, containsString("On behalf of Sequani Ltd.: Name Title"))
then
entityCreationService.betweenStrings("On behalf of Sequani Ltd.: Name Title", "On behalf of", "PII", EntityType.ENTITY, $section)
.forEach(authorEntity -> {
@ -428,7 +427,7 @@ rule "21: Redact On behalf of Sequani Ltd.:"
rule "22: Redact On behalf of Syngenta Ltd.:"
when
$section: Section(excludesTables, containsString("On behalf of Syngenta Ltd.: Name Title"))
$section: Section(!hasTables, containsString("On behalf of Syngenta Ltd.: Name Title"))
then
entityCreationService.betweenStrings("On behalf of Syngenta Ltd.: Name Title", "Study dates", "PII", EntityType.ENTITY, $section)
.forEach(authorEntity -> {
@ -506,7 +505,7 @@ rule "101: Redact CAS Number"
rule "102: Guidelines FileAttributes"
when
$section: Section(excludesTables, (containsString("DATA REQUIREMENT(S):") || containsString("TEST GUIDELINE(S):")) && (containsString("OECD") || containsString("EPA") || containsString("OPPTS")))
$section: Section(!hasTables, (containsString("DATA REQUIREMENT(S):") || containsString("TEST GUIDELINE(S):")) && (containsString("OECD") || containsString("EPA") || containsString("OPPTS")))
then
RedactionSearchUtility.findBoundariesByRegex("OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", $section.getTextBlock()).stream()
.map(boundary -> $section.getTextBlock().subSequence(boundary).toString())
@ -590,7 +589,7 @@ rule "Apply image recategorization"
ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
$image: Image($id == id)
then
$image.setImageType(parseImageType($imageType));
$image.setImageType(ImageType.fromString($imageType));
end
// --------------------------------------- merging rules -------------------------------------------------------------------