Resolve RED-7679 "4" #174

Closed
ali.oezyetimoglu1 wants to merge 3 commits from RED-7679-4 into master
11 changed files with 1355 additions and 5 deletions

View File

@ -425,6 +425,71 @@ public interface SemanticNode {
}
/**
* Checks whether this SemanticNode contains exactly the provided String as a word.
* @param word - String which the TextBlock might contain
* @return true, if this node's TextBlock contains string
*/
default boolean containsWord(String word) {
return getTextBlock().getWords().stream().anyMatch(s -> s.equals(word));
}
/**
* Checks whether this SemanticNode contains exactly the provided String as a word ignoring case.
* @param word - String which the TextBlock might contain
* @return true, if this node's TextBlock contains string
*/
default boolean containsWordIgnoreCase(String word) {
return getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(s -> s.equals(word.toLowerCase(Locale.ENGLISH)));
}
/**
* Checks whether this SemanticNode contains any of the provided Strings as a word.
* @param words - A List of Strings which the TextBlock might contain
* @return true, if this node's TextBlock contains any of the provided strings
*/
default boolean containsAnyWord(String... words) {
return Arrays.stream(words).anyMatch(word -> getTextBlock().getWords().stream().anyMatch(word::equals));
}
/**
* Checks whether this SemanticNode contains any of the provided Strings as a word ignoring case.
* @param words - A List of Strings which the TextBlock might contain
* @return true, if this node's TextBlock contains any of the provided strings
*/
default boolean containsAnyWordIgnoreCase(String... words) {
return Arrays.stream(words).map(String::toLowerCase).anyMatch(word -> getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(word::equals));
}
/**
* Checks whether this SemanticNode contains all the provided Strings as word.
* @param words - A List of Strings which the TextBlock might contain
* @return true, if this node's TextBlock contains all the provided strings
*/
default boolean containsAllWords(String... words) {
return Arrays.stream(words).allMatch(word -> getTextBlock().getWords().stream().anyMatch(word::equals));
}
/**
* Checks whether this SemanticNode contains all the provided Strings as word ignoring case.
* @param words - A List of Strings which the TextBlock might contain
* @return true, if this node's TextBlock contains all the provided strings
*/
default boolean containsAllWordsIgnoreCase(String... words) {
return getTextBlock().getWords().stream().map(String::toLowerCase).allMatch(word -> getTextBlock().getWords().stream().anyMatch(s -> word.toLowerCase(Locale.ENGLISH).equals(s)));
}
/**
* Checks whether this SemanticNode matches the provided regex pattern.
*

View File

@ -3,11 +3,14 @@ package com.iqser.red.service.redaction.v1.server.model.document.textblock;
import static java.lang.String.format;
import java.awt.geom.Rectangle2D;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
@ -40,6 +43,7 @@ public class AtomicTextBlock implements TextBlock {
//string coordinates
TextRange textRange;
String searchText;
List<String> words;
List<Integer> lineBreaks;
//position coordinates
@ -114,6 +118,20 @@ public class AtomicTextBlock implements TextBlock {
return new TextRange(lineBreaks.get(lineNumber - 1) + textRange.start(), lineBreaks.get(lineNumber) + textRange.start());
}
public List<String> getWords() {
if (words == null) {
words = new ArrayList<>();
BreakIterator iterator = BreakIterator.getWordInstance(Locale.ENGLISH);
iterator.setText(searchText);
int start = iterator.first();
for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) {
words.add(searchText.substring(start, end));
}
}
return words;
}
@Override
public List<AtomicTextBlock> getAtomicTextBlocks() {

View File

@ -7,6 +7,7 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Collection;
import java.util.Map;
import java.util.stream.Stream;
@ -86,6 +87,13 @@ public class ConcatenatedTextBlock implements TextBlock {
}
@Override
public List<String> getWords() {
return atomicTextBlocks.stream().map(AtomicTextBlock::getWords).flatMap(Collection::stream).toList();
}
@Override
public int numberOfLines() {

View File

@ -18,6 +18,7 @@ public interface TextBlock extends CharSequence {
String getSearchText();
List<String> getWords();
List<AtomicTextBlock> getAtomicTextBlocks();

View File

@ -58,6 +58,8 @@ public class EntityCreationService {
public Stream<TextEntity> betweenStrings(String start, String stop, String type, EntityType entityType, SemanticNode node) {
checkIfBothStartAndEndAreEmpty(start, stop);
List<TextRange> startTextRanges = RedactionSearchUtility.findTextRangesByString(start, node.getTextBlock());
List<TextRange> stopTextRanges = RedactionSearchUtility.findTextRangesByString(stop, node.getTextBlock());
@ -67,6 +69,8 @@ public class EntityCreationService {
public Stream<TextEntity> betweenStringsIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
checkIfBothStartAndEndAreEmpty(start, stop);
List<TextRange> startBoundaries = RedactionSearchUtility.findTextRangesByStringIgnoreCase(start, node.getTextBlock());
List<TextRange> stopBoundaries = RedactionSearchUtility.findTextRangesByStringIgnoreCase(stop, node.getTextBlock());
@ -76,6 +80,8 @@ public class EntityCreationService {
public Stream<TextEntity> betweenStringsIncludeStart(String start, String stop, String type, EntityType entityType, SemanticNode node) {
checkIfBothStartAndEndAreEmpty(start, stop);
List<TextRange> startBoundaries = RedactionSearchUtility.findTextRangesByString(start, node.getTextBlock());
List<TextRange> stopBoundaries = RedactionSearchUtility.findTextRangesByString(stop, node.getTextBlock());
@ -90,6 +96,8 @@ public class EntityCreationService {
public Stream<TextEntity> betweenStringsIncludeStartIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
checkIfBothStartAndEndAreEmpty(start, stop);
List<TextRange> startBoundaries = RedactionSearchUtility.findTextRangesByStringIgnoreCase(start, node.getTextBlock());
List<TextRange> stopBoundaries = RedactionSearchUtility.findTextRangesByStringIgnoreCase(stop, node.getTextBlock());
@ -104,6 +112,8 @@ public class EntityCreationService {
public Stream<TextEntity> betweenStringsIncludeEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) {
checkIfBothStartAndEndAreEmpty(start, stop);
List<TextRange> startBoundaries = RedactionSearchUtility.findTextRangesByString(start, node.getTextBlock());
List<TextRange> stopBoundaries = RedactionSearchUtility.findTextRangesByString(stop, node.getTextBlock());
@ -118,6 +128,8 @@ public class EntityCreationService {
public Stream<TextEntity> betweenStringsIncludeEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
checkIfBothStartAndEndAreEmpty(start, stop);
List<TextRange> startBoundaries = RedactionSearchUtility.findTextRangesByStringIgnoreCase(start, node.getTextBlock());
List<TextRange> stopBoundaries = RedactionSearchUtility.findTextRangesByStringIgnoreCase(stop, node.getTextBlock());
@ -132,6 +144,8 @@ public class EntityCreationService {
public Stream<TextEntity> betweenStringsIncludeStartAndEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) {
checkIfBothStartAndEndAreEmpty(start, stop);
List<TextRange> startBoundaries = RedactionSearchUtility.findTextRangesByString(start, node.getTextBlock());
List<TextRange> stopBoundaries = RedactionSearchUtility.findTextRangesByString(stop, node.getTextBlock());
@ -150,6 +164,8 @@ public class EntityCreationService {
public Stream<TextEntity> betweenStringsIncludeStartAndEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
checkIfBothStartAndEndAreEmpty(start, stop);
List<TextRange> startBoundaries = RedactionSearchUtility.findTextRangesByStringIgnoreCase(start, node.getTextBlock());
List<TextRange> stopBoundaries = RedactionSearchUtility.findTextRangesByStringIgnoreCase(stop, node.getTextBlock());
@ -168,6 +184,8 @@ public class EntityCreationService {
public Stream<TextEntity> shortestBetweenAnyString(List<String> starts, List<String> stops, String type, EntityType entityType, SemanticNode node) {
checkIfBothStartAndEndAreEmpty(starts, stops);
List<TextRange> startTextRanges = RedactionSearchUtility.findTextRangesByList(starts, node.getTextBlock());
List<TextRange> stopTextRanges = RedactionSearchUtility.findTextRangesByList(stops, node.getTextBlock());
@ -177,6 +195,8 @@ public class EntityCreationService {
public Stream<TextEntity> shortestBetweenAnyStringIgnoreCase(List<String> starts, List<String> stops, String type, EntityType entityType, SemanticNode node) {
checkIfBothStartAndEndAreEmpty(starts, stops);
List<TextRange> startTextRanges = RedactionSearchUtility.findTextRangesByListIgnoreCase(starts, node.getTextBlock());
List<TextRange> stopTextRanges = RedactionSearchUtility.findTextRangesByListIgnoreCase(stops, node.getTextBlock());
@ -185,6 +205,8 @@ public class EntityCreationService {
public Stream<TextEntity> shortestBetweenAnyStringIgnoreCase(List<String> starts, List<String> stops, String type, EntityType entityType, SemanticNode node, int limit) {
checkIfBothStartAndEndAreEmpty(starts, stops);
List<TextRange> startTextRanges = RedactionSearchUtility.findTextRangesByListIgnoreCase(starts, node.getTextBlock());
List<TextRange> stopTextRanges = RedactionSearchUtility.findTextRangesByListIgnoreCase(stops, node.getTextBlock());
@ -219,9 +241,6 @@ public class EntityCreationService {
public Stream<TextEntity> betweenTextRanges(List<TextRange> startBoundaries, List<TextRange> stopBoundaries, String type, EntityType entityType, SemanticNode node, int limit) {
if (startBoundaries.isEmpty() || stopBoundaries.isEmpty()) {
return Stream.empty();
}
List<TextRange> entityBoundaries = findNonOverlappingBoundariesBetweenBoundariesWithMinimalDistances(startBoundaries, stopBoundaries);
return entityBoundaries.stream()
.filter(range -> (limit == 0 || range.length() <= limit))
@ -263,6 +282,18 @@ public class EntityCreationService {
}
private void checkIfBothStartAndEndAreEmpty(String start, String end) {
checkIfBothStartAndEndAreEmpty(List.of(start), List.of(end));
}
private <T> void checkIfBothStartAndEndAreEmpty(List<T> start, List<T> end) {
if ((start == null || start.isEmpty()) && (end == null || end.isEmpty())) {
throw new IllegalArgumentException("Start and end values are empty!");
}
}
public Stream<TextEntity> bySearchImplementation(SearchImplementation searchImplementation, String type, EntityType entityType, SemanticNode node) {
return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())

View File

@ -0,0 +1,125 @@
package com.iqser.red.service.redaction.v1.server;
import static org.mockito.Mockito.when;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.FilterType;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.iqser.red.commons.jackson.ObjectMapperFactory;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration;
import com.knecon.fforesight.tenantcommons.TenantContext;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, properties = {"application.type=DocuMine"})
@Import(AnalysisTest.RedactionIntegrationTestConfiguration.class)
public class AnalysisTest extends AbstractRedactionIntegrationTest {
private static final String RULES = loadFromClassPath("drools/table_demo.drl");
private static final String COMPONENT_RULES = loadFromClassPath("drools/table_demo_components.drl");
@Test
@Disabled
public void analyzeTableDemoFile() {
AnalyzeRequest request = uploadFileToStorage("files/TableDemo/Table_examples.pdf");
System.out.println("Start Full integration test");
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
System.out.println("Finished structure analysis");
AnalyzeResult result = analyzeService.analyze(request);
System.out.println("Finished analysis");
// var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
// var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID);
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
String outputFileName = OsUtils.getTemporaryDirectory() + "/Documine.pdf";
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
fileOutputStream.write(annotateResponse.getDocument());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
@Import(LayoutParsingServiceProcessorConfiguration.class)
@ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)})
static class RedactionIntegrationTestConfiguration {
@Bean
@Primary
public StorageService inmemoryStorage() {
return new FileSystemBackedStorageService(ObjectMapperFactory.create());
}
}
@BeforeEach
public void stubClients() {
TenantContext.setTenantId("documine");
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(System.currentTimeMillis());
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(RULES));
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.COMPONENT)).thenReturn(System.currentTimeMillis());
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.COMPONENT)).thenReturn(JSONPrimitive.of(COMPONENT_RULES));
loadDictionaryForTest();
loadTypeForTest();
loadNerForTest();
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse());
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS_INDICATOR)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build()));
mockDictionaryCalls(null);
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
}
}

View File

@ -1099,7 +1099,7 @@ rule "ETC.2.1: Redact signatures (vertebrate study)"
// Rule unit: ETC.3
rule "ETC.3.0: Redact logos (vertebrate study)"
rule "ETC.3.0: Redact logos (non vertebrate study)"
when
not FileAttribute(label == "Vertebrate Study", value == "Yes")
$logo: Image(imageType == ImageType.LOGO)
@ -1107,7 +1107,7 @@ rule "ETC.3.0: Redact logos (vertebrate study)"
$logo.redact("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002");
end
rule "ETC.3.1: Redact logos (non vertebrate study)"
rule "ETC.3.1: Redact logos (vertebrate study)"
when
FileAttribute(label == "Vertebrate Study", value == "Yes")
$logo: Image(imageType == ImageType.LOGO)

View File

@ -0,0 +1,448 @@
package drools
import static java.lang.String.format;
import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.anyMatch;
import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.exactMatch;
import java.util.List;
import java.util.LinkedList;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.Collection;
import java.util.stream.Stream;
import java.util.Optional;
import com.iqser.red.service.redaction.v1.server.model.document.*;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.*;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity
import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule
import com.iqser.red.service.redaction.v1.server.model.document.nodes.*;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SectionIdentifier;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.*;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConcatenatedTextBlock;
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel;
import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService;
import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus
global Document document
global EntityCreationService entityCreationService
global ManualChangesApplicationService manualChangesApplicationService
global Dictionary dictionary
//------------------------------------ queries ------------------------------------
query "getFileAttributes"
$fileAttribute: FileAttribute()
end
//---------------------------------------------------------------------------
rule "TAB.0.0: Study Type File Attribute"
when
not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487"))
$section: Section(containsAnyString("DATA REQUIREMENT", "TEST GUIDELINE", "MÉTODO(S) DE REFERÊNCIA(S):")
&& containsAnyString("OECD", "EPA", "OPPTS"))
then
RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|(?:.{5,40}(?:Number |Procedure |Guideline )))(4[\\d]{2})", 1 ,$section.getTextBlock()).stream()
.map(boundary -> $section.getTextBlock().subSequence(boundary).toString())
.map(value -> FileAttribute.builder().label("OECD Number").value(value).build())
.forEach(fileAttribute -> insert(fileAttribute));
RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", 1, $section.getTextBlock()).stream()
.map(boundary -> $section.getTextBlock().subSequence(boundary).toString())
.map(value -> FileAttribute.builder().label("OECD Number").value(value).build())
.forEach(fileAttribute -> insert(fileAttribute));
end
rule "TAB.0.1: Guidelines"
when
$section: Section(containsAnyString("DATA REQUIREMENT", "TEST GUIDELINE", "MÉTODO(S) DE REFERÊNCIA(S):") && containsAnyString("OECD", "EPA", "OPPTS"))
then
entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY, 1, $section)
.forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline no. found"));
entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", "oecd_guideline_year", EntityType.ENTITY, 2, $section)
.forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found"));
entityCreationService.byRegex("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", "oecd_guideline_year", EntityType.ENTITY, 1, $section)
.forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found"));
entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_number", EntityType.ENTITY, 1, $section)
.forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline number found"));
entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY, 2, $section)
.forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found"));
end
rule "TAB.1.0: Full Table extraction (Guideline Deviation)"
when
FileAttribute(label == "OECD Number", valueEqualsAnyOf("425"))
$section: Section(getHeadline().containsString("Full Table"))
$table: Table() from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList()
$tableCell: TableCell(!header) from $table.streamTableCells().toList()
then
entityCreationService.bySemanticNode($tableCell, "guideline_deviation", EntityType.ENTITY)
.ifPresent(entity -> entity.apply("TAB.1.0", "full table extracted"));
end
rule "TAB.2.0: Individual row extraction (Clinical Signs)"
when
FileAttribute(label == "OECD Number", valueEqualsAnyOf("425"))
$section: Section(getHeadline().containsString("Individual Rows Extraction"))
$table: Table(hasHeader("Animal No."), (hasRowWithHeaderAndAnyValue("Animal No.", List.of("120-2", "120-5")))) from $section.streamChildren().toList()
TableCell($row: row, containsAnyString("120-2", "120-5")) from $table.streamTableCellsWithHeader("Animal No.").toList()
$tableCell: TableCell($row == row) from $table.streamTableCells().toList()
then
entityCreationService.bySemanticNode($tableCell, "clinical_signs", EntityType.ENTITY)
.ifPresent(entity -> entity.apply("TAB.2.0", "Individual row based on animal number"));
end
rule "TAB.3.0: Individual column extraction (Strain)"
when
FileAttribute(label == "OECD Number", valueEqualsAnyOf("425"))
$section: Section(getHeadline().containsString("Individual Column"))
$table: Table(hasHeader("Sex")) from $section.streamChildren().toList()
then
$table.streamTableCellsWithHeader("Sex")
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dosages", EntityType.ENTITY))
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(redactionEntity -> redactionEntity.apply("TAB.3.0", "Individual column based on column header"));
end
rule "TAB.4.0: Combined Columns Extraction - Sex and Dosage"
when
FileAttribute(label == "OECD Number", valueEqualsAnyOf("425"))
$section: Section(getHeadline().containsString("Combined Columns"))
$table: Table(hasHeader("Sex"), hasHeader("Dosage (mg/kg bw)")) from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList()
$maleCells: TableCell($row: row, containsAnyWordIgnoreCase("Male")) from $table.streamTableCellsWithHeader("Sex").toList()
$dosageCells: TableCell($row == row) from $table.streamTableCellsWithHeader("Dosage").toList()
then
entityCreationService.bySemanticNode($maleCells, "combined_male_dosage", EntityType.ENTITY)
.ifPresent(entity -> entity.apply("TAB.4.0", "Dosage combined in row with male"));
entityCreationService.bySemanticNode($dosageCells, "combined_male_dosage", EntityType.ENTITY)
.ifPresent(entity -> entity.apply("TAB.4.0", "Dosage combined in row with male"));
end
rule "TAB.4.1: Combined Columns Extraction - Sex and Mortality"
when
FileAttribute(label == "OECD Number", valueEqualsAnyOf("425"))
$section: Section(getHeadline().containsString("Combined Columns"))
$table: Table(hasHeader("Sex"), hasHeader("Mortality")) from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList()
$femaleCells: TableCell($row: row, containsAnyWordIgnoreCase("Female")) from $table.streamTableCellsWithHeader("Sex").toList()
$mortalityCells: TableCell($row == row) from $table.streamTableCellsWithHeader("Mortality").toList()
then
entityCreationService.bySemanticNode($femaleCells, "combined_female_mortality", EntityType.ENTITY)
.ifPresent(entity -> entity.apply("TAB.4.1", "Mortality combined in row with female"));
entityCreationService.bySemanticNode($mortalityCells, "combined_female_mortality", EntityType.ENTITY)
.ifPresent(entity -> entity.apply("TAB.4.1", "Mortality combined in row with female"));
end
rule "TAB.5.0: Targeted cell extraction"
when
FileAttribute(label == "OECD Number", valueEqualsAnyOf("425"))
$section: Section(getHeadline().containsString("Value Extraction"))
$table: Table(hasHeader("Mortality"), hasRowWithHeaderAndAnyValue("Sex", List.of("male", "Male")), hasRowWithHeaderAndValue("Mortality", "Survived")) from $section.streamChildren().toList()
TableCell(containsWordIgnoreCase("Male"), $row: row) from $table.streamTableCellsWithHeader("Sex").toList()
TableCell($row == row, containsStringIgnoreCase("Survived")) from $table.streamTableCellsWithHeader("Mortality").toList()
$dosageCell: TableCell($row == row) from $table.streamTableCellsWithHeader("Dosage").toList()
then
entityCreationService.bySemanticNode($dosageCell,"doses_mg_kg_bw", EntityType.ENTITY)
.ifPresent(entity -> entity.apply("TAB.5.0", "Dosage found in row with survived male"));
end
rule "TAB.6.0: Targeted cell extraction (Experimental Stop date)"
when
$section: Section(getHeadline().containsString("Advanced Table Extraction"), containsAllStrings("female", "Female", "Survived", "Group 2"))
$table: Table(hasHeader("Group 2")) from $section.streamChildren().toList()
TableCell(containsWordIgnoreCase("Female"), $row: row) from $table.streamTableCellsWithHeader("Group 2").toList()
TableCell($row == row, containsStringIgnoreCase("Survived")) from $table.streamTableCellsWithHeader("Group 2").toList()
$femaleSurvived: TableCell($row == row) from $table.streamTableCellsWithHeader("Group 2").toList()
then
entityCreationService.bySemanticNode($femaleSurvived, "experiment_female_survived", EntityType.ENTITY)
.ifPresent(entity -> entity.apply("TAB.6.0", "Female in group to experimental start date"));
end
rule "TAB.7.0: Indicator (Species)"
when
FileAttribute(label == "OECD Number", valueEqualsAnyOf("425"))
$section: Section(getHeadline().containsString("Entity-Based"))
$table: Table() from $section.streamAllSubNodesOfType(NodeType.TABLE).toList()
TableCell(isHeader(), containsString("Title"), $col: col) from $table.streamTableCells().toList()
TableCell(hasEntitiesOfType("vertebrate"), $row: row) from $table.streamTableCells().toList()
$cell: TableCell($col == col, $row == row) from $table.streamTableCells().toList()
then
entityCreationService.bySemanticNode($cell, "study_design", EntityType.ENTITY)
.ifPresent(redactionEntity -> redactionEntity.apply("TAB.7.0", "Vertebrate study found"));
end
//------------------------------------ Manual redaction rules ------------------------------------
// Rule unit: MAN.0
rule "MAN.0.0: Apply manual resize redaction"
salience 128
when
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate)
not ManualResizeRedaction(annotationId == $id, requestDate.isBefore($requestDate))
$entityToBeResized: TextEntity(matchesAnnotationId($id))
then
manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
retract($resizeRedaction);
update($entityToBeResized);
$entityToBeResized.getIntersectingNodes().forEach(node -> update(node));
end
rule "MAN.0.1: Apply manual resize redaction"
salience 128
when
$resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate)
not ManualResizeRedaction(annotationId == $id, requestDate.isBefore($requestDate))
$imageToBeResized: Image(id == $id)
then
manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction);
retract($resizeRedaction);
update($imageToBeResized);
update($imageToBeResized.getParent());
end
// Rule unit: MAN.1
rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
salience 128
when
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeRemoved: TextEntity(matchesAnnotationId($id))
then
$entityToBeRemoved.getManualOverwrite().addChange($idRemoval);
update($entityToBeRemoved);
retract($idRemoval);
$entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node));
end
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
salience 128
when
$idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED)
$imageEntityToBeRemoved: Image($id == id)
then
$imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval);
update($imageEntityToBeRemoved);
retract($idRemoval);
update($imageEntityToBeRemoved.getParent());
end
// Rule unit: MAN.2
rule "MAN.2.0: Apply force redaction"
salience 128
when
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToForce: TextEntity(matchesAnnotationId($id))
then
$entityToForce.getManualOverwrite().addChange($force);
update($entityToForce);
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
retract($force);
end
rule "MAN.2.1: Apply force redaction to images"
salience 128
when
$force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED)
$imageToForce: Image(id == $id)
then
$imageToForce.getManualOverwrite().addChange($force);
update($imageToForce);
update($imageToForce.getParent());
retract($force);
end
// Rule unit: MAN.3
rule "MAN.3.0: Apply entity recategorization"
salience 128
when
$recategorization: ManualRecategorization($id: annotationId, $type: type, status == AnnotationStatus.APPROVED, $requestDate: requestDate)
not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate))
$entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type != $type)
then
$entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node));
manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization);
retract($recategorization);
// Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication.
retract($entityToBeRecategorized);
end
rule "MAN.3.1: Apply entity recategorization of same type"
salience 128
when
$recategorization: ManualRecategorization($id: annotationId, $type: type, status == AnnotationStatus.APPROVED, $requestDate: requestDate)
not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate))
$entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type == $type)
then
$entityToBeRecategorized.getManualOverwrite().addChange($recategorization);
retract($recategorization);
end
rule "MAN.3.2: Apply image recategorization"
salience 128
when
$recategorization: ManualRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate)
not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate))
$imageToBeRecategorized: Image($id == id)
then
manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization);
update($imageToBeRecategorized);
update($imageToBeRecategorized.getParent());
retract($recategorization);
end
// Rule unit: MAN.4
rule "MAN.4.0: Apply legal basis change"
salience 128
when
$legalbasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED)
$imageToBeRecategorized: Image($id == id)
then
$imageToBeRecategorized.getManualOverwrite().addChange($legalbasisChange);
end
rule "MAN.4.1: Apply legal basis change"
salience 128
when
$legalBasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED)
$entityToBeChanged: TextEntity(matchesAnnotationId($id))
then
$entityToBeChanged.getManualOverwrite().addChange($legalBasisChange);
end
//------------------------------------ Entity merging rules ------------------------------------
// Rule unit: X.0
rule "X.0.0: Remove Entity contained by Entity of same type"
salience 65
when
$larger: TextEntity($type: type, $entityType: entityType, active())
$contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active())
then
$contained.remove("X.0.0", "remove Entity contained by Entity of same type");
retract($contained);
end
// Rule unit: X.2
rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE"
salience 64
when
$falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active())
$entity: TextEntity(containedBy($falsePositive), type == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active())
then
$entity.getIntersectingNodes().forEach(node -> update(node));
$entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE");
retract($entity)
end
// Rule unit: X.3
rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"
salience 64
when
$falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active())
$recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active())
then
$recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION");
retract($recommendation);
end
// Rule unit: X.4
rule "X.4.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"
salience 256
when
$entity: TextEntity($type: type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active())
$recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active())
then
$entity.addEngines($recommendation.getEngines());
$recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type");
retract($recommendation);
end
// Rule unit: X.5
rule "X.5.0: Remove Entity of type RECOMMENDATION when contained by ENTITY"
salience 256
when
$entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active())
$recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active())
then
$recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY");
retract($recommendation);
end
// Rule unit: X.7
rule "X.7.0: remove all images"
salience 512
when
$image: Image(imageType != ImageType.OCR, !hasManualChanges())
then
$image.remove("X.7.0", "remove all images");
retract($image);
end
//------------------------------------ File attributes rules ------------------------------------
// Rule unit: FA.1
rule "FA.1.0: Remove duplicate FileAttributes"
salience 64
when
$fileAttribute: FileAttribute($label: label, $value: value)
$duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value)
then
retract($duplicate);
end
//------------------------------------ Local dictionary search rules ------------------------------------
// Rule unit: LDS.0
rule "LDS.0.0: Run local dictionary search"
agenda-group "LOCAL_DICTIONARY_ADDS"
salience -999
when
$dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels()
then
entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document)
.forEach(entity -> {
Collection<MatchedRule> matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue());
entity.addMatchedRules(matchedRules);
});
end

View File

@ -0,0 +1,134 @@
package drools
import com.iqser.red.service.redaction.v1.server.redaction.model.Section
global Section section
// --------------------------------------- Your rules below this line--------------------------------------------------
rule "0a: Study Type File Attribute"
when
Section(
!fileAttributeContainsAnyOf("OECD Number","402","403","404","405","425","429","436","438","439","471","487")
&& (
text.contains("DATA REQUIREMENT")
|| text.contains("TEST GUIDELINE")
|| text.contains("MÉTODO(S) DE REFERÊNCIA(S):")
)
&& (
text.contains("OECD")
|| text.contains("EPA")
|| text.contains("OPPTS")
)
)
then
section.addFileAttribute("OECD Number", "(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|(?:.{5,40}(?:Number |Procedure |Guideline )))(4[\\d]{2})", true, 1);
section.addFileAttribute("OECD Number", "(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", true, 1);
end
rule "1: Guidelines"
when
Section(
(
text.contains("DATA REQUIREMENT")
|| text.contains("TEST GUIDELINE")
|| text.contains("MÉTODO(S) DE REFERÊNCIA(S):")
)
&& (
text.contains("OECD")
|| text.contains("EPA")
|| text.contains("OPPTS")
)
)
then
section.redactByRegEx("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", true, 1, "oecd_guideline_number", 1, "OECD Guideline no. found", "n-a");
section.redactByRegEx("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", true, 2, "oecd_guideline_year", 1, "OECD Guideline year found", "n-a");
section.redactByRegEx("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", true, 1, "oecd_guideline_year", 1, "OECD Guideline year found", "n-a");
section.redactByRegEx("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", true, 1, "oecd_guideline_number", 1, "OECD Guideline number found", "n-a");
section.redactByRegEx("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", true, 2, "oecd_guideline_year", 1, "OECD Guideline year found", "n-a");
end
rule "2: Full Table extraction (Guideline Deviation)"
when
Section(
fileAttributeByLabelEqualsIgnoreCase("OECD Number","425")
&& headlineContainsWord("Full Table")
&& hasTableHeader("Sex")
)
then
section.redactSectionTextWithoutHeadLine("guideline_deviation",2,"Full table extraction into guideline deviation","n-a");
end
rule "3: Individual row extraction (Clinical Signs)"
when
Section(
fileAttributeByLabelEqualsIgnoreCase("OECD Number","425")
&& headlineContainsWord("Individual Rows")
&& hasTableHeader("Animal No.")
&& (rowEquals("Animal No.","120-2") || rowEquals("Animal No.","120-5"))
)
then
section.redactSectionTextWithoutHeadLine("clinical_signs",3,"Individual row based on animal number","n-a");
end
rule "4: Individual column extraction (Strain)"
when
Section(
fileAttributeByLabelEqualsIgnoreCase("OECD Number","425")
&& headlineContainsWord("Individual Column")
&& hasTableHeader("Sex")
)
then
section.redactCell("Sex",4,"dosages",false,"Individual column based on column header","n-a");
end
rule "5: Dose Mortality"
when
Section(
fileAttributeByLabelEqualsIgnoreCase("OECD Number","425")
&& headlineContainsWord("Combined Columns")
&& hasTableHeader("Mortality")
&& hasTableHeader("Dosage (mg/kg bw)")
)
then
section.redactCell("Mortality",5,"dose_mortality",false,"Dose Mortality found.","n-a");
section.redactCell("Dosage (mg/kg bw)",5,"dose_mortality_dose",false,"Dose Mortality dose found.","n-a");
end
rule "6: targeted cell extraction (Experimental Start date)"
when
Section(
fileAttributeByLabelEqualsIgnoreCase("OECD Number","425")
&& headlineContainsWord("Value Extraction")
&& hasTableHeader("Mortality")
&& (rowEquals("Sex","male") || rowEquals("Sex","Male"))
&& rowEquals("Mortality","Survived")
)
then
section.redactCell("Treatment start",6,"experimental_start_date",false,"Female deaths date to experimental start date","n-a");
end
rule "7: targeted cell extraction (Experimental Stop date)"
when
Section(
isInTable()
&& (searchText.contains("female") || searchText.contains("Female"))
&& searchText.contains("Survived")
)
then
section.redactCellBelow(7,"experimental_end_date",true,false,"Female deaths date to experimental start date","n-a", "Sex", "Group 2");
end
rule "8: Indicator (Species)"
when
Section(
fileAttributeByLabelEqualsIgnoreCase("OECD Number","425")
&& headlineContainsWord("Entity-Based")
&& matchesType("vertebrates")
)
then
section.redactCell("Title",8,"study_design",false,"Vertebrate study found","n-a");
end

View File

@ -0,0 +1,520 @@
package drools
import static java.lang.String.format;
import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.anyMatch;
import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.exactMatch;
import java.util.List;
import java.util.LinkedList;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.Collection;
import java.util.stream.Stream;
import java.util.Optional;
import com.iqser.red.service.redaction.v1.server.model.component.Component;
import com.iqser.red.service.redaction.v1.server.model.component.Entity;
import com.iqser.red.service.redaction.v1.server.service.document.ComponentCreationService;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Change;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
global ComponentCreationService componentCreationService
//------------------------------------ queries ------------------------------------
query "getFileAttributes"
$fileAttribute: FileAttribute()
end
query "getComponents"
$component: Component()
end
//------------------------------------ Guideline mapping object ------------------------------------
declare GuidelineMapping
number: String
year: String
guideline: String
end
//------------------------------------ Default Components rules ------------------------------------
rule "StudyTitle.0.0: First Title found"
when
$titleCandidates: List() from collect (Entity(type == "title"))
then
componentCreationService.firstOrElse("StudyTitle.0.0", "Study_Title", $titleCandidates, "");
end
rule "PerformingLaboratory.1.0: Performing Laboratory name and country found in same section"
when
$laboratoryName: Entity(type == "laboratory_name", $node: containingNode)
$laboratoryCountry: Entity(type == "laboratory_country", containingNode == $node)
not Entity(type == "laboratory_country", containingNode == $node, Math.abs($laboratoryName.startOffset - startOffset) < Math.abs($laboratoryName.startOffset - $laboratoryCountry.startOffset))
then
componentCreationService.create("PerformingLaboratory.1.0", "Performing_Laboratory", $laboratoryName.getValue() + ", " + $laboratoryCountry.getValue(), "Laboratory name and country found!", List.of($laboratoryName, $laboratoryCountry));
end
rule "PerformingLaboratory.2.0: Performing Laboratory name but no country found in same section"
when
$laboratoryName: Entity(type == "laboratory_name", $node: containingNode)
not Entity(type == "laboratory_country", containingNode == $node)
then
componentCreationService.create("PerformingLaboratory.2.0", "Performing_Laboratory", $laboratoryName.getValue(), "Only laboratory name found!", List.of($laboratoryName));
end
rule "PerformingLaboratory.0.2: Performing Laboratory not found"
salience -1
when
not Component(name == "Performing_Laboratory")
then
componentCreationService.create("PerformingLaboratory.0.2", "Performing_Laboratory", "", "fallback");
end
rule "ReportNumber.0.0: First Report number found"
when
$reportNumberCandidates: List() from collect (Entity(type == "report_number"))
then
componentCreationService.firstOrElse("ReportNumber.0.0", "Report_Number", $reportNumberCandidates, "");
end
rule "GLPStudy.0.0: GLP Study found"
when
$glpStudyList: List(!isEmpty) from collect(Entity(type == "glp_study"))
then
componentCreationService.create("GLPStudy.0.0", "GLP_Study", "Yes", "Yes if present, No if not", $glpStudyList);
end
rule "GLPStudy.1.0: GLP Study not found"
when
not Entity(type == "glp_study")
then
componentCreationService.create("GLPStudy.1.0", "GLP_Study", "No", "Yes if present, No if not");
end
rule "TestGuideline.0.0: create OECD number and year guideline mappings"
salience 2
when
Entity(type == "oecd_guideline_number")
Entity(type == "oecd_guideline_year")
then
insert(new GuidelineMapping("425", "2008", "Nº 425: Acute oral Toxicity - Up-and-Down Procedure (03/10/2008)"));
insert(new GuidelineMapping("425", "2001", "Nº 425: Acute oral Toxicity - Up-and-Down Procedure (17/12/2001)"));
insert(new GuidelineMapping("402", "2017", "Nº 402: Acute Dermal Toxicity (09/10/2017)"));
insert(new GuidelineMapping("402", "1987", "Nº 402: Acute Dermal Toxicity (24/02/1987)"));
insert(new GuidelineMapping("403", "2009", "Nº 403: Acute Inhalation Toxicity (08/09/2009)"));
insert(new GuidelineMapping("403", "1981", "Nº 403: Acute Inhalation Toxicity (12/05/1981)"));
insert(new GuidelineMapping("433", "2018", "Nº 433: Acute Inhalation Toxicity: Fixed Concentration Procedure (27/06/2018)"));
insert(new GuidelineMapping("433", "2017", "Nº 433: Acute Inhalation Toxicity: Fixed Concentration Procedure (09/10/2017)"));
insert(new GuidelineMapping("436", "2009", "Nº 436: Acute Inhalation Toxicity Acute Toxic Class Method (08/09/2009)"));
insert(new GuidelineMapping("404", "1981", "Nº 404: Acute Dermal Irritation/Corrosion (12/05/1981)"));
insert(new GuidelineMapping("404", "1992", "Nº 404: Acute Dermal Irritation/Corrosion (17/07/1992)"));
insert(new GuidelineMapping("404", "2002", "Nº 404: Acute Dermal Irritation/Corrosion (24/04/2002)"));
insert(new GuidelineMapping("404", "2015", "Nº 404: Acute Dermal Irritation/Corrosion (28/07/2015)"));
insert(new GuidelineMapping("405", "2017", "Nº 405: Acute Eye Irritation/Corrosion (09/10/2017)"));
insert(new GuidelineMapping("405", "2012", "Nº 405: Acute Eye Irritation/Corrosion (02/10/2012)"));
insert(new GuidelineMapping("405", "2002", "Nº 405: Acute Eye Irritation/Corrosion (24/04/2002)"));
insert(new GuidelineMapping("405", "1987", "Nº 405: Acute Eye Irritation/Corrosion (24/02/1987)"));
insert(new GuidelineMapping("429", "2002", "Nº 429: Skin Sensitisation: Local Lymph Node Assay (24/04/2002)"));
insert(new GuidelineMapping("429", "2010", "Nº 429: Skin Sensitisation (23/07/2010)"));
insert(new GuidelineMapping("442A", "2018", "Nº 442A: Skin Sensitization (23/07/2018)"));
insert(new GuidelineMapping("442B", "2018", "Nº 442B: Skin Sensitization (27/06/2018)"));
insert(new GuidelineMapping("471", "1997", "Nº 471: Bacterial Reverse Mutation Test (21/07/1997)"));
insert(new GuidelineMapping("471", "2020", "Nº 471: Bacterial Reverse Mutation Test (26/06/2020)"));
insert(new GuidelineMapping("406", "1992", "Nº 406: Skin Sensitisation (1992)"));
insert(new GuidelineMapping("428", "2004", "Nº 428: Split-Thickness Skin test (2004)"));
insert(new GuidelineMapping("438", "2018", "Nº 438: Eye Irritation (26/06/2018)"));
insert(new GuidelineMapping("439", "2019", "Nº 439: Skin Irritation (2019)"));
insert(new GuidelineMapping("474", "2016", "Nº 474: Micronucleus Bone Marrow Cells Rat (2016)"));
insert(new GuidelineMapping("487", "2016", "Nº 487: Micronucleus Human Lymphocytes (2016)"));
end
rule "TestGuideline.0.1: match OECD number and year with guideline mappings"
salience 1
when
not Component(name == "Test_Guidelines_1")
GuidelineMapping($year: year, $number: number, $guideline: guideline)
$guidelineNumber: Entity(type == "oecd_guideline_number", value == $number)
$guidelineYear: Entity(type == "oecd_guideline_year", value == $year)
then
componentCreationService.create(
"TestGuideline.0.0",
"Test_Guidelines_1",
$guideline,
"OECD Number and guideline year mapped!",
List.of($guidelineNumber, $guidelineYear)
);
end
rule "TestGuideline.1.0: no guideline mapping found"
when
not Component(name == "Test_Guidelines_1")
$guideLine: Entity(type == "oecd_guideline")
then
componentCreationService.create("TestGuideline.2.0", "Test_Guidelines_1", $guideLine.getValue(), "No Mapping for OECD number and year found, using fallback instead!", List.of($guideLine));
end
rule "TestGuideline.2.0: All values of EPA guideline and EC guidelines"
when
$guidelines: List() from collect (Entity(type == "epa_guideline" || type == "ec_guideline"))
then
componentCreationService.joining("TestGuideline.2.0", "Test_Guidelines_2", $guidelines);
end
rule "StartDate.0.0: All experimental start dates converted to dd/MM/yyyy"
when
$startDates: List() from collect (Entity(type == "experimental_start_date"))
then
componentCreationService.convertDates("StartDate.0.0", "Experimental_Starting_Date", $startDates);
end
rule "CompletionDate.0.0: All experimental end dates converted to dd/MM/yyyy"
when
$endDates: List() from collect (Entity(type == "experimental_end_date"))
then
componentCreationService.convertDates("CompletionDate.0.0", "Experimental_Completion_Date", $endDates);
end
rule "AnalysisCertificate.0.0: Unique values of certificate of analysis batch identification"
when
$batchNumbers: List() from collect (Entity(type == "batch_number"))
then
componentCreationService.joiningUnique("AnalysisCertificate.0.0", "Certificate_of_Analysis_Batch_Identification", $batchNumbers);
end
rule "StudyConclusion.0.0: Study conclusion in first found section"
when
$oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$studyConclusions: List() from collect(Entity(type == "study_conclusion"))
then
componentCreationService.joiningFromFirstSectionOnly("StudyConclusion.0.0", "Study_Conclusion", $studyConclusions, " ");
end
rule "GuidelineDeviation.0.0: Guideline deviation as sentences"
when
$oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$guidelineDeviations: List() from collect (Entity(type == "guideline_deviation"))
then
componentCreationService.joining("GuidelineDeviation.0.0", "Deviation_from_the_Guideline", $guidelineDeviations, "\n");
end
rule "Species.0.0: First found species"
when
$oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$species: List() from collect (Entity(type == "species"))
then
componentCreationService.firstOrElse("Species.0.0", "Species", $species, "");
end
rule "Strain.0.0: First found strain"
when
$oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$strain: List() from collect (Entity(type == "strain"))
then
componentCreationService.firstOrElse("Strain.0.0", "Strain", $strain, "");
end
rule "Conclusion.0.0: Unique values of Conclusion LD50"
when
$oecdNumber: String() from List.of("402", "403", "425", "436")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$conclusions: List() from collect (Entity(type == "ld50_value"))
then
componentCreationService.joiningUnique("Conclusion.0.0", "Conclusion_LD50_mg_per_kg", $conclusions);
end
rule "Conclusion0.1.0: Greater than found"
when
$oecdNumber: String() from List.of("402", "403", "425", "436")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$conclusions: List(!isEmpty()) from collect (Entity(type == "ld50_greater"))
then
componentCreationService.create("Conclusion.1.0", "Conclusion_LD50_Greater_than", "Greater than", "Entity of type 'ld50_greater' found", $conclusions);
end
rule "Conclusion.1.1: Greater than not found"
when
$oecdNumber: String() from List.of("402", "403", "425", "436")
FileAttribute(label == "OECD Number", value == $oecdNumber)
not Entity(type == "ld50_greater")
then
componentCreationService.create("Conclusion.1.1", "Conclusion_LD50_Greater_than", "", "No entity of type 'ld50_greater' found");
end
rule "Conclusion.2.0: Minimum confidence as unique values"
when
$oecdNumber: String() from List.of("402", "403", "425", "436")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$conclusions: List() from collect (Entity(type == "confidence_minimal"))
then
componentCreationService.joiningUnique("Conclusion.2.0", "Conclusion_Minimum_Confidence", $conclusions);
end
rule "Conclusion.3.0: Maximum confidence as unique values"
when
$oecdNumber: String() from List.of("402", "403", "425", "436")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$conclusions: List() from collect (Entity(type == "confidence_maximal"))
then
componentCreationService.joiningUnique("Conclusion.3.0", "Conclusion_Maximum_Confidence", $conclusions);
end
rule "Necropsy.0.0: Necropsy findings from longest section"
when
FileAttribute(label == "OECD Number", value == "402")
$necropsies: List() from collect (Entity(type == "necropsy_findings"))
then
componentCreationService.joiningFromLongestSectionOnly("Necropsy.0.0", "Necropsy_Findings", $necropsies, " ");
end
rule "Necropsy.0.1: Necropsy findings joined with \n"
when
FileAttribute(label == "OECD Number", value == "403" || value == "436")
$necropsies: List() from collect (Entity(type == "necropsy_findings"))
then
componentCreationService.joining("Necropsy.0.0", "Necropsy_Findings", $necropsies, "\n");
end
rule "Necropsy.1.0: Doses mg per kg of Bodyweight as one block"
when
FileAttribute(label == "OECD Number", value == "402")
$dosages: List() from collect (Entity(type == "doses_(mg_kg_bw)"))
then
componentCreationService.joining("Necropsy.1.0", "Doses_mg_per_kg_bw", $dosages, " ");
end
rule "Necropsy.2.0: Conducted with 4 hours of exposure as one block"
when
$oecdNumber: String() from List.of("403", "436")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$exposures: List() from collect (Entity(type == "4h_exposure"))
then
componentCreationService.joining("Necropsy.3.0", "Conducted_with_4_Hours_of_Exposure", $exposures, " ");
end
rule "StudyDesign.0.0: Study design as one block"
when
$oecdNumber: String() from List.of("404", "405", "429", "406", "428", "438", "439", "474", "487")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$studyDesigns: List() from collect (Entity(type == "study_design"))
then
componentCreationService.joining("StudyDesign.0.0", "Study_Design", $studyDesigns, " ");
end
rule "Results.0.0: Results and conclusions as joined values"
when
$oecdNumber: String() from List.of("406", "428", "438", "439", "474", "487")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$results: List() from collect (Entity(type == "results_and_conclusion"))
then
componentCreationService.joining("Results.0.0", "Results_and_Conclusions", $results, " ");
end
rule "WeightBehavior.0.0: Weight change behavior as sentences"
when
FileAttribute(label == "OECD Number", value == "402")
$weightChanges: List() from collect (Entity(type == "weight_behavior_changes"))
then
componentCreationService.joining("WeightBehavior.0.0", "Weight_Behavior_Changes", $weightChanges, "\n");
end
rule "MortalityStatement.0.0: Mortality statements as one block"
when
FileAttribute(label == "OECD Number", value == "402")
$mortalityStatements: List() from collect (Entity(type == "mortality_statement"))
then
componentCreationService.joining("MortalityStatement.0.0", "Mortality_Statement", $mortalityStatements, " ");
end
rule "ClinicalObservations.0.0: Clinical observations as sentences"
when
FileAttribute(label == "OECD Number", value == "403")
$observations: List() from collect (Entity(type == "clinical_observations"))
then
componentCreationService.joining("MortalityStatement.0.0", "Clinical_Observations", $observations, "\n");
end
rule "BodyWeight.0.0: Bodyweight changes as sentences"
when
FileAttribute(label == "OECD Number", value == "403")
$weightChanges: List() from collect (Entity(type == "bodyweight_changes"))
then
componentCreationService.joining("BodyWeight.0.0", "Body_Weight_Changes", $weightChanges, "\n");
end
rule "Detailing.0.0: Detailing of reported changes as one block"
when
$oecdNumber: String() from List.of("404", "405")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$detailings: List() from collect (Entity(type == "detailing"))
then
componentCreationService.joining("Detailing.0.0", "Detailing_of_Reported_Changes", $detailings, " ");
end
rule "Sex.0.0: Male sex found"
when
$oecdNumber: String() from List.of("405", "429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$males: List(!isEmpty) from collect (Entity(type == "sex", (value.toLowerCase() == "male" || value.toLowerCase() == "males")))
then
componentCreationService.create("Sex.0.0", "Sex", "male", "male sex found", $males);
end
rule "Sex.1.0: Female sex found"
when
$oecdNumber: String() from List.of("405", "429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$females: List(!isEmpty) from collect (Entity(type == "sex", (value.toLowerCase() == "female" || value.toLowerCase() == "females")))
then
componentCreationService.create("Sex.0.0", "Sex", "female", "female sex found", $females);
end
rule "NumberOfAnimals.0.0: Number of animals found"
when
$oecdNumber: String() from List.of("405", "429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$numberOfAnimals: Entity(type == "number_of_animals")
then
componentCreationService.create("NumberOfAnimals.0.0", "Number_of_Animals", $numberOfAnimals.getValue(), "Number of animals found directly", $numberOfAnimals);
end
rule "NumberOfAnimals.1.0: Count unique occurences of animals"
when
$oecdNumber: String() from List.of("405", "429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
not Entity(type == "number_of_animals")
$animals: List() from collect (Entity(type == "animal_number"))
then
componentCreationService.uniqueValueCount("NumberOfAnimals.1.0", "Number_of_Animals", $animals);
end
rule "ClinicalSigns.0.0: Clinical signs as sentences"
when
$oecdNumber: String() from List.of("425")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$clinicalSigns: List() from collect (Entity(type == "clinical_signs"))
then
componentCreationService.joining("ClinicalSigns.0.0", "Clinical_Signs", $clinicalSigns, "\n");
end
rule "DoseMortality.0.0: Dose mortality joined with dose from same table row"
when
$oecdNumber: String() from List.of("425")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$doseMortalities: List() from collect (Entity(type == "dose_mortality" || type == "dose_mortality_dose"))
then
componentCreationService.joiningFromSameTableRow("DoseMortality.0.0", "Dose_Mortality", $doseMortalities);
end
rule "Mortality.0.0: Mortality as one block"
when
$oecdNumber: String() from List.of("425")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$mortalities: List() from collect (Entity(type == "mortality"))
then
componentCreationService.joining("Mortality.0.0", "Mortality", $mortalities, " ");
end
rule "Dosages.0.0: First found value of Dosages"
when
$oecdNumber: String() from List.of("425")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$mortalities: List() from collect (Entity(type == "dosages"))
then
componentCreationService.firstOrElse("Dosages.0.0", "Dosages", $mortalities, "");
end
rule "PrelimResults.0.0: Preliminary test results as sentences"
when
$oecdNumber: String() from List.of("429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$results: List() from collect (Entity(type == "preliminary_test_results"))
then
componentCreationService.joining("PrelimResults.0.0", "Preliminary_Test_Results", $results, "\n");
end
rule "TestResults.0.0: Test results as one block"
when
$oecdNumber: String() from List.of("429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$results: List() from collect (Entity(type == "test_results"))
then
componentCreationService.joining("TestResults.0.0", "Test_Results", $results, " ");
end
rule "PositiveControl.0.0: Was the definitive study conducted with positive control"
when
$oecdNumber: String() from List.of("429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$results: List() from collect (Entity(type == "positive_control"))
then
componentCreationService.joining("PositiveControl.0.0", "Was_the_definitive_study_conducted_with_positive_control", $results, " ");
end
rule "MainResults.0.0: Results from main study as one block"
when
$oecdNumber: String() from List.of("429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$results: List() from collect (Entity(type == "results_(main_study)"))
then
componentCreationService.joining("MainResults.0.0", "Results_Main_Study", $results, " ");
end
rule "UsedApproach.0.0: Used approach found and mapped to 'Group'"
when
$oecdNumber: String() from List.of("429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$results: List(!isEmpty()) from collect (Entity(type == "approach_used"))
then
componentCreationService.create("UsedApproach.0.0", "What_was_the_approach_used", "Group", "'Group' when approach used is present, else 'Individual'", $results);
end
rule "UsedApproach.1.0: Used approach not found and thus 'Individual'"
when
$oecdNumber: String() from List.of("429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
not Entity(type == "approach_used")
then
componentCreationService.create("UsedApproach.1.0", "What_was_the_approach_used", "Individual", "'Group' when approach used is present, else 'Individual'");
end
/*
rule "DefaultComponents.999.0: Create components for all unmapped entities."
salience -999
when
$allEntities: List(!isEmpty()) from collect (Entity())
then
componentCreationService.createComponentsForUnMappedEntities("DefaultComponents.999.0", $allEntities);
end
*/
//------------------------------------ Component merging rules ------------------------------------
/*
rule "X.0.0: merge duplicate component references"
when
$first: Component()
$duplicate: Component(this != $first, name == $first.name, value == $first.value)
then
$first.getReferences().addAll($duplicate.getReferences());
retract($duplicate);
end
*/