diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java index c5e75340..ce93415f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java @@ -425,6 +425,71 @@ public interface SemanticNode { } + /** + * Checks whether this SemanticNode contains exactly the provided String as a word. + * @param word - String which the TextBlock might contain + * @return true, if this node's TextBlock contains string + */ + default boolean containsWord(String word) { + + return getTextBlock().getWords().stream().anyMatch(s -> s.equals(word)); + } + + + /** + * Checks whether this SemanticNode contains exactly the provided String as a word ignoring case. + * @param word - String which the TextBlock might contain + * @return true, if this node's TextBlock contains string + */ + default boolean containsWordIgnoreCase(String word) { + + return getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(s -> s.equals(word.toLowerCase(Locale.ENGLISH))); + } + + + /** + * Checks whether this SemanticNode contains any of the provided Strings as a word. + * @param words - A List of Strings which the TextBlock might contain + * @return true, if this node's TextBlock contains any of the provided strings + */ + default boolean containsAnyWord(String... words) { + + return Arrays.stream(words).anyMatch(word -> getTextBlock().getWords().stream().anyMatch(word::equals)); + } + + + /** + * Checks whether this SemanticNode contains any of the provided Strings as a word ignoring case. + * @param words - A List of Strings which the TextBlock might contain + * @return true, if this node's TextBlock contains any of the provided strings + */ + default boolean containsAnyWordIgnoreCase(String... words) { + + return Arrays.stream(words).map(String::toLowerCase).anyMatch(word -> getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(word::equals)); + } + + + /** + * Checks whether this SemanticNode contains all the provided Strings as word. + * @param words - A List of Strings which the TextBlock might contain + * @return true, if this node's TextBlock contains all the provided strings + */ + default boolean containsAllWords(String... words) { + + return Arrays.stream(words).allMatch(word -> getTextBlock().getWords().stream().anyMatch(word::equals)); + } + + + /** + * Checks whether this SemanticNode contains all the provided Strings as word ignoring case. + * @param words - A List of Strings which the TextBlock might contain + * @return true, if this node's TextBlock contains all the provided strings + */ + default boolean containsAllWordsIgnoreCase(String... words) { + + return getTextBlock().getWords().stream().map(String::toLowerCase).allMatch(word -> getTextBlock().getWords().stream().anyMatch(s -> word.toLowerCase(Locale.ENGLISH).equals(s))); + } + /** * Checks whether this SemanticNode matches the provided regex pattern. * diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/AtomicTextBlock.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/AtomicTextBlock.java index 49d707ed..e56c5f8a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/AtomicTextBlock.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/AtomicTextBlock.java @@ -3,11 +3,14 @@ package com.iqser.red.service.redaction.v1.server.model.document.textblock; import static java.lang.String.format; import java.awt.geom.Rectangle2D; +import java.text.BreakIterator; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -40,6 +43,7 @@ public class AtomicTextBlock implements TextBlock { //string coordinates TextRange textRange; String searchText; + List words; List lineBreaks; //position coordinates @@ -114,6 +118,20 @@ public class AtomicTextBlock implements TextBlock { return new TextRange(lineBreaks.get(lineNumber - 1) + textRange.start(), lineBreaks.get(lineNumber) + textRange.start()); } + public List getWords() { + + if (words == null) { + words = new ArrayList<>(); + BreakIterator iterator = BreakIterator.getWordInstance(Locale.ENGLISH); + iterator.setText(searchText); + int start = iterator.first(); + for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) { + words.add(searchText.substring(start, end)); + } + } + return words; + } + @Override public List getAtomicTextBlocks() { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/ConcatenatedTextBlock.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/ConcatenatedTextBlock.java index d180c5c9..57334d5f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/ConcatenatedTextBlock.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/ConcatenatedTextBlock.java @@ -7,6 +7,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.List; +import java.util.Collection; import java.util.Map; import java.util.stream.Stream; @@ -86,6 +87,13 @@ public class ConcatenatedTextBlock implements TextBlock { } + @Override + public List getWords() { + + return atomicTextBlocks.stream().map(AtomicTextBlock::getWords).flatMap(Collection::stream).toList(); + } + + @Override public int numberOfLines() { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/TextBlock.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/TextBlock.java index 50e76203..57303ee1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/TextBlock.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/TextBlock.java @@ -18,6 +18,7 @@ public interface TextBlock extends CharSequence { String getSearchText(); + List getWords(); List getAtomicTextBlocks(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java index f35f4ba8..bdfa11d5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java @@ -58,6 +58,8 @@ public class EntityCreationService { public Stream betweenStrings(String start, String stop, String type, EntityType entityType, SemanticNode node) { + checkIfBothStartAndEndAreEmpty(start, stop); + List startTextRanges = RedactionSearchUtility.findTextRangesByString(start, node.getTextBlock()); List stopTextRanges = RedactionSearchUtility.findTextRangesByString(stop, node.getTextBlock()); @@ -67,6 +69,8 @@ public class EntityCreationService { public Stream betweenStringsIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) { + checkIfBothStartAndEndAreEmpty(start, stop); + List startBoundaries = RedactionSearchUtility.findTextRangesByStringIgnoreCase(start, node.getTextBlock()); List stopBoundaries = RedactionSearchUtility.findTextRangesByStringIgnoreCase(stop, node.getTextBlock()); @@ -76,6 +80,8 @@ public class EntityCreationService { public Stream betweenStringsIncludeStart(String start, String stop, String type, EntityType entityType, SemanticNode node) { + checkIfBothStartAndEndAreEmpty(start, stop); + List startBoundaries = RedactionSearchUtility.findTextRangesByString(start, node.getTextBlock()); List stopBoundaries = RedactionSearchUtility.findTextRangesByString(stop, node.getTextBlock()); @@ -90,6 +96,8 @@ public class EntityCreationService { public Stream betweenStringsIncludeStartIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) { + checkIfBothStartAndEndAreEmpty(start, stop); + List startBoundaries = RedactionSearchUtility.findTextRangesByStringIgnoreCase(start, node.getTextBlock()); List stopBoundaries = RedactionSearchUtility.findTextRangesByStringIgnoreCase(stop, node.getTextBlock()); @@ -104,6 +112,8 @@ public class EntityCreationService { public Stream betweenStringsIncludeEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) { + checkIfBothStartAndEndAreEmpty(start, stop); + List startBoundaries = RedactionSearchUtility.findTextRangesByString(start, node.getTextBlock()); List stopBoundaries = RedactionSearchUtility.findTextRangesByString(stop, node.getTextBlock()); @@ -118,6 +128,8 @@ public class EntityCreationService { public Stream betweenStringsIncludeEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) { + checkIfBothStartAndEndAreEmpty(start, stop); + List startBoundaries = RedactionSearchUtility.findTextRangesByStringIgnoreCase(start, node.getTextBlock()); List stopBoundaries = RedactionSearchUtility.findTextRangesByStringIgnoreCase(stop, node.getTextBlock()); @@ -132,6 +144,8 @@ public class EntityCreationService { public Stream betweenStringsIncludeStartAndEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) { + checkIfBothStartAndEndAreEmpty(start, stop); + List startBoundaries = RedactionSearchUtility.findTextRangesByString(start, node.getTextBlock()); List stopBoundaries = RedactionSearchUtility.findTextRangesByString(stop, node.getTextBlock()); @@ -150,6 +164,8 @@ public class EntityCreationService { public Stream betweenStringsIncludeStartAndEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) { + checkIfBothStartAndEndAreEmpty(start, stop); + List startBoundaries = RedactionSearchUtility.findTextRangesByStringIgnoreCase(start, node.getTextBlock()); List stopBoundaries = RedactionSearchUtility.findTextRangesByStringIgnoreCase(stop, node.getTextBlock()); @@ -168,6 +184,8 @@ public class EntityCreationService { public Stream shortestBetweenAnyString(List starts, List stops, String type, EntityType entityType, SemanticNode node) { + checkIfBothStartAndEndAreEmpty(starts, stops); + List startTextRanges = RedactionSearchUtility.findTextRangesByList(starts, node.getTextBlock()); List stopTextRanges = RedactionSearchUtility.findTextRangesByList(stops, node.getTextBlock()); @@ -177,6 +195,8 @@ public class EntityCreationService { public Stream shortestBetweenAnyStringIgnoreCase(List starts, List stops, String type, EntityType entityType, SemanticNode node) { + checkIfBothStartAndEndAreEmpty(starts, stops); + List startTextRanges = RedactionSearchUtility.findTextRangesByListIgnoreCase(starts, node.getTextBlock()); List stopTextRanges = RedactionSearchUtility.findTextRangesByListIgnoreCase(stops, node.getTextBlock()); @@ -185,6 +205,8 @@ public class EntityCreationService { public Stream shortestBetweenAnyStringIgnoreCase(List starts, List stops, String type, EntityType entityType, SemanticNode node, int limit) { + checkIfBothStartAndEndAreEmpty(starts, stops); + List startTextRanges = RedactionSearchUtility.findTextRangesByListIgnoreCase(starts, node.getTextBlock()); List stopTextRanges = RedactionSearchUtility.findTextRangesByListIgnoreCase(stops, node.getTextBlock()); @@ -219,9 +241,6 @@ public class EntityCreationService { public Stream betweenTextRanges(List startBoundaries, List stopBoundaries, String type, EntityType entityType, SemanticNode node, int limit) { - if (startBoundaries.isEmpty() || stopBoundaries.isEmpty()) { - return Stream.empty(); - } List entityBoundaries = findNonOverlappingBoundariesBetweenBoundariesWithMinimalDistances(startBoundaries, stopBoundaries); return entityBoundaries.stream() .filter(range -> (limit == 0 || range.length() <= limit)) @@ -263,6 +282,18 @@ public class EntityCreationService { } + private void checkIfBothStartAndEndAreEmpty(String start, String end) { + checkIfBothStartAndEndAreEmpty(List.of(start), List.of(end)); + } + + + private void checkIfBothStartAndEndAreEmpty(List start, List end) { + if ((start == null || start.isEmpty()) && (end == null || end.isEmpty())) { + throw new IllegalArgumentException("Start and end values are empty!"); + } + } + + public Stream bySearchImplementation(SearchImplementation searchImplementation, String type, EntityType entityType, SemanticNode node) { return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange()) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisTest.java new file mode 100644 index 00000000..87c73e96 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisTest.java @@ -0,0 +1,125 @@ +package com.iqser.red.service.redaction.v1.server; + +import static org.mockito.Mockito.when; + +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.List; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.springframework.boot.autoconfigure.EnableAutoConfiguration; +import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.ComponentScan; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.FilterType; +import org.springframework.context.annotation.Import; +import org.springframework.context.annotation.Primary; +import org.springframework.test.context.junit.jupiter.SpringExtension; + +import com.iqser.red.commons.jackson.ObjectMapperFactory; +import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; +import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult; +import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type; +import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest; +import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse; +import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils; +import com.iqser.red.storage.commons.StorageAutoConfiguration; +import com.iqser.red.storage.commons.service.StorageService; +import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService; +import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; +import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration; +import com.knecon.fforesight.tenantcommons.TenantContext; + +@ExtendWith(SpringExtension.class) +@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, properties = {"application.type=DocuMine"}) +@Import(AnalysisTest.RedactionIntegrationTestConfiguration.class) +public class AnalysisTest extends AbstractRedactionIntegrationTest { + + private static final String RULES = loadFromClassPath("drools/table_demo.drl"); + private static final String COMPONENT_RULES = loadFromClassPath("drools/table_demo_components.drl"); + + + @Test + @Disabled + public void analyzeTableDemoFile() { + + AnalyzeRequest request = uploadFileToStorage("files/TableDemo/Table_examples.pdf"); + + System.out.println("Start Full integration test"); + analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request); + System.out.println("Finished structure analysis"); + AnalyzeResult result = analyzeService.analyze(request); + System.out.println("Finished analysis"); +// var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); +// var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID); + AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build()); + + String outputFileName = OsUtils.getTemporaryDirectory() + "/Documine.pdf"; + + try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) { + fileOutputStream.write(annotateResponse.getDocument()); + } catch (IOException e) { + throw new RuntimeException(e); + } + + } + + + @Configuration + @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class}) + @Import(LayoutParsingServiceProcessorConfiguration.class) + @ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)}) + static class RedactionIntegrationTestConfiguration { + + @Bean + @Primary + public StorageService inmemoryStorage() { + + return new FileSystemBackedStorageService(ObjectMapperFactory.create()); + } + + } + + + @BeforeEach + public void stubClients() { + + TenantContext.setTenantId("documine"); + + when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(System.currentTimeMillis()); + when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(RULES)); + when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.COMPONENT)).thenReturn(System.currentTimeMillis()); + when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.COMPONENT)).thenReturn(JSONPrimitive.of(COMPONENT_RULES)); + + loadDictionaryForTest(); + loadTypeForTest(); + loadNerForTest(); + when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse()); + + when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder() + .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(DOSSIER_REDACTIONS_INDICATOR) + .dossierTemplateId(TEST_DOSSIER_ID) + .hexColor("#ffe187") + .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .build())); + + mockDictionaryCalls(null); + + when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors); + } + +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index c65a8059..9fb37ab0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -1099,7 +1099,7 @@ rule "ETC.2.1: Redact signatures (vertebrate study)" // Rule unit: ETC.3 -rule "ETC.3.0: Redact logos (vertebrate study)" +rule "ETC.3.0: Redact logos (non vertebrate study)" when not FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) @@ -1107,7 +1107,7 @@ rule "ETC.3.0: Redact logos (vertebrate study)" $logo.redact("ETC.3.0", "Logo Found", "Article 39(e)(3) of Regulation (EC) No 178/2002"); end -rule "ETC.3.1: Redact logos (non vertebrate study)" +rule "ETC.3.1: Redact logos (vertebrate study)" when FileAttribute(label == "Vertebrate Study", value == "Yes") $logo: Image(imageType == ImageType.LOGO) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl new file mode 100644 index 00000000..69015695 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl @@ -0,0 +1,448 @@ +package drools + +import static java.lang.String.format; +import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.anyMatch; +import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.exactMatch; + +import java.util.List; +import java.util.LinkedList; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.Collection; +import java.util.stream.Stream; +import java.util.Optional; + +import com.iqser.red.service.redaction.v1.server.model.document.*; +import com.iqser.red.service.redaction.v1.server.model.document.TextRange; +import com.iqser.red.service.redaction.v1.server.model.document.entity.*; +import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; +import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity +import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule +import com.iqser.red.service.redaction.v1.server.model.document.nodes.*; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.SectionIdentifier; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.*; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConcatenatedTextBlock; +import com.iqser.red.service.redaction.v1.server.model.NerEntities; +import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; +import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel; +import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; +import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService; +import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus + +global Document document +global EntityCreationService entityCreationService +global ManualChangesApplicationService manualChangesApplicationService +global Dictionary dictionary + +//------------------------------------ queries ------------------------------------ + +query "getFileAttributes" + $fileAttribute: FileAttribute() + end +//--------------------------------------------------------------------------- + +rule "TAB.0.0: Study Type File Attribute" + when + not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) + $section: Section(containsAnyString("DATA REQUIREMENT", "TEST GUIDELINE", "MÉTODO(S) DE REFERÊNCIA(S):") + && containsAnyString("OECD", "EPA", "OPPTS")) + then + RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|(?:.{5,40}(?:Number |Procedure |Guideline )))(4[\\d]{2})", 1 ,$section.getTextBlock()).stream() + .map(boundary -> $section.getTextBlock().subSequence(boundary).toString()) + .map(value -> FileAttribute.builder().label("OECD Number").value(value).build()) + .forEach(fileAttribute -> insert(fileAttribute)); + RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", 1, $section.getTextBlock()).stream() + .map(boundary -> $section.getTextBlock().subSequence(boundary).toString()) + .map(value -> FileAttribute.builder().label("OECD Number").value(value).build()) + .forEach(fileAttribute -> insert(fileAttribute)); + end + +rule "TAB.0.1: Guidelines" + when + $section: Section(containsAnyString("DATA REQUIREMENT", "TEST GUIDELINE", "MÉTODO(S) DE REFERÊNCIA(S):") && containsAnyString("OECD", "EPA", "OPPTS")) + then + entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY, 1, $section) + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline no. found")); + entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", "oecd_guideline_year", EntityType.ENTITY, 2, $section) + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found")); + entityCreationService.byRegex("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", "oecd_guideline_year", EntityType.ENTITY, 1, $section) + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found")); + entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_number", EntityType.ENTITY, 1, $section) + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline number found")); + entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY, 2, $section) + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found")); + end + +rule "TAB.1.0: Full Table extraction (Guideline Deviation)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Full Table")) + $table: Table() from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList() + $tableCell: TableCell(!header) from $table.streamTableCells().toList() + then + entityCreationService.bySemanticNode($tableCell, "guideline_deviation", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.1.0", "full table extracted")); + end + +rule "TAB.2.0: Individual row extraction (Clinical Signs)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Individual Rows Extraction")) + $table: Table(hasHeader("Animal No."), (hasRowWithHeaderAndAnyValue("Animal No.", List.of("120-2", "120-5")))) from $section.streamChildren().toList() + TableCell($row: row, containsAnyString("120-2", "120-5")) from $table.streamTableCellsWithHeader("Animal No.").toList() + $tableCell: TableCell($row == row) from $table.streamTableCells().toList() + then + entityCreationService.bySemanticNode($tableCell, "clinical_signs", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.2.0", "Individual row based on animal number")); + end + +rule "TAB.3.0: Individual column extraction (Strain)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Individual Column")) + $table: Table(hasHeader("Sex")) from $section.streamChildren().toList() + then + $table.streamTableCellsWithHeader("Sex") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dosages", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.apply("TAB.3.0", "Individual column based on column header")); + end + +rule "TAB.4.0: Combined Columns Extraction - Sex and Dosage" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Combined Columns")) + $table: Table(hasHeader("Sex"), hasHeader("Dosage (mg/kg bw)")) from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList() + $maleCells: TableCell($row: row, containsAnyWordIgnoreCase("Male")) from $table.streamTableCellsWithHeader("Sex").toList() + $dosageCells: TableCell($row == row) from $table.streamTableCellsWithHeader("Dosage").toList() + then + entityCreationService.bySemanticNode($maleCells, "combined_male_dosage", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.4.0", "Dosage combined in row with male")); + entityCreationService.bySemanticNode($dosageCells, "combined_male_dosage", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.4.0", "Dosage combined in row with male")); + end + +rule "TAB.4.1: Combined Columns Extraction - Sex and Mortality" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Combined Columns")) + $table: Table(hasHeader("Sex"), hasHeader("Mortality")) from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList() + $femaleCells: TableCell($row: row, containsAnyWordIgnoreCase("Female")) from $table.streamTableCellsWithHeader("Sex").toList() + $mortalityCells: TableCell($row == row) from $table.streamTableCellsWithHeader("Mortality").toList() + then + entityCreationService.bySemanticNode($femaleCells, "combined_female_mortality", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.4.1", "Mortality combined in row with female")); + entityCreationService.bySemanticNode($mortalityCells, "combined_female_mortality", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.4.1", "Mortality combined in row with female")); + end + +rule "TAB.5.0: Targeted cell extraction" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Value Extraction")) + $table: Table(hasHeader("Mortality"), hasRowWithHeaderAndAnyValue("Sex", List.of("male", "Male")), hasRowWithHeaderAndValue("Mortality", "Survived")) from $section.streamChildren().toList() + TableCell(containsWordIgnoreCase("Male"), $row: row) from $table.streamTableCellsWithHeader("Sex").toList() + TableCell($row == row, containsStringIgnoreCase("Survived")) from $table.streamTableCellsWithHeader("Mortality").toList() + $dosageCell: TableCell($row == row) from $table.streamTableCellsWithHeader("Dosage").toList() + then + entityCreationService.bySemanticNode($dosageCell,"doses_mg_kg_bw", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.5.0", "Dosage found in row with survived male")); + end + +rule "TAB.6.0: Targeted cell extraction (Experimental Stop date)" + when + $section: Section(getHeadline().containsString("Advanced Table Extraction"), containsAllStrings("female", "Female", "Survived", "Group 2")) + $table: Table(hasHeader("Group 2")) from $section.streamChildren().toList() + TableCell(containsWordIgnoreCase("Female"), $row: row) from $table.streamTableCellsWithHeader("Group 2").toList() + TableCell($row == row, containsStringIgnoreCase("Survived")) from $table.streamTableCellsWithHeader("Group 2").toList() + $femaleSurvived: TableCell($row == row) from $table.streamTableCellsWithHeader("Group 2").toList() + then + entityCreationService.bySemanticNode($femaleSurvived, "experiment_female_survived", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.6.0", "Female in group to experimental start date")); + end + +rule "TAB.7.0: Indicator (Species)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Entity-Based")) + $table: Table() from $section.streamAllSubNodesOfType(NodeType.TABLE).toList() + TableCell(isHeader(), containsString("Title"), $col: col) from $table.streamTableCells().toList() + TableCell(hasEntitiesOfType("vertebrate"), $row: row) from $table.streamTableCells().toList() + $cell: TableCell($col == col, $row == row) from $table.streamTableCells().toList() + then + entityCreationService.bySemanticNode($cell, "study_design", EntityType.ENTITY) + .ifPresent(redactionEntity -> redactionEntity.apply("TAB.7.0", "Vertebrate study found")); + end + +//------------------------------------ Manual redaction rules ------------------------------------ + +// Rule unit: MAN.0 +rule "MAN.0.0: Apply manual resize redaction" + salience 128 + when + $resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate) + not ManualResizeRedaction(annotationId == $id, requestDate.isBefore($requestDate)) + $entityToBeResized: TextEntity(matchesAnnotationId($id)) + then + manualChangesApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); + retract($resizeRedaction); + update($entityToBeResized); + $entityToBeResized.getIntersectingNodes().forEach(node -> update(node)); + end + +rule "MAN.0.1: Apply manual resize redaction" + salience 128 + when + $resizeRedaction: ManualResizeRedaction($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate) + not ManualResizeRedaction(annotationId == $id, requestDate.isBefore($requestDate)) + $imageToBeResized: Image(id == $id) + then + manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction); + retract($resizeRedaction); + update($imageToBeResized); + update($imageToBeResized.getParent()); + end + + +// Rule unit: MAN.1 +rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" + salience 128 + when + $idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeRemoved: TextEntity(matchesAnnotationId($id)) + then + $entityToBeRemoved.getManualOverwrite().addChange($idRemoval); + update($entityToBeRemoved); + retract($idRemoval); + $entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node)); + end + +rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image" + salience 128 + when + $idRemoval: IdRemoval($id: annotationId, status == AnnotationStatus.APPROVED) + $imageEntityToBeRemoved: Image($id == id) + then + $imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval); + update($imageEntityToBeRemoved); + retract($idRemoval); + update($imageEntityToBeRemoved.getParent()); + end + + +// Rule unit: MAN.2 +rule "MAN.2.0: Apply force redaction" + salience 128 + when + $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToForce: TextEntity(matchesAnnotationId($id)) + then + $entityToForce.getManualOverwrite().addChange($force); + update($entityToForce); + $entityToForce.getIntersectingNodes().forEach(node -> update(node)); + retract($force); + end + +rule "MAN.2.1: Apply force redaction to images" + salience 128 + when + $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) + $imageToForce: Image(id == $id) + then + $imageToForce.getManualOverwrite().addChange($force); + update($imageToForce); + update($imageToForce.getParent()); + retract($force); + end + + +// Rule unit: MAN.3 +rule "MAN.3.0: Apply entity recategorization" + salience 128 + when + $recategorization: ManualRecategorization($id: annotationId, $type: type, status == AnnotationStatus.APPROVED, $requestDate: requestDate) + not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) + $entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type != $type) + then + $entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node)); + manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization); + retract($recategorization); + // Entity is copied and inserted, so the old entity needs to be retracted to avoid duplication. + retract($entityToBeRecategorized); + end + +rule "MAN.3.1: Apply entity recategorization of same type" + salience 128 + when + $recategorization: ManualRecategorization($id: annotationId, $type: type, status == AnnotationStatus.APPROVED, $requestDate: requestDate) + not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) + $entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type == $type) + then + $entityToBeRecategorized.getManualOverwrite().addChange($recategorization); + retract($recategorization); + end + +rule "MAN.3.2: Apply image recategorization" + salience 128 + when + $recategorization: ManualRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate) + not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) + $imageToBeRecategorized: Image($id == id) + then + manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization); + update($imageToBeRecategorized); + update($imageToBeRecategorized.getParent()); + retract($recategorization); + end + + +// Rule unit: MAN.4 +rule "MAN.4.0: Apply legal basis change" + salience 128 + when + $legalbasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED) + $imageToBeRecategorized: Image($id == id) + then + $imageToBeRecategorized.getManualOverwrite().addChange($legalbasisChange); + end + +rule "MAN.4.1: Apply legal basis change" + salience 128 + when + $legalBasisChange: ManualLegalBasisChange($id: annotationId, status == AnnotationStatus.APPROVED) + $entityToBeChanged: TextEntity(matchesAnnotationId($id)) + then + $entityToBeChanged.getManualOverwrite().addChange($legalBasisChange); + end + + +//------------------------------------ Entity merging rules ------------------------------------ + +// Rule unit: X.0 +rule "X.0.0: Remove Entity contained by Entity of same type" + salience 65 + when + $larger: TextEntity($type: type, $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) + then + $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); + retract($contained); + end + + +// Rule unit: X.2 +rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges(), active()) + then + $entity.getIntersectingNodes().forEach(node -> update(node)); + $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + retract($entity) + end + + +// Rule unit: X.3 +rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" + salience 64 + when + $falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + then + $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); + retract($recommendation); + end + + +// Rule unit: X.4 +rule "X.4.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY with same type" + salience 256 + when + $entity: TextEntity($type: type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + then + $entity.addEngines($recommendation.getEngines()); + $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"); + retract($recommendation); + end + + +// Rule unit: X.5 +rule "X.5.0: Remove Entity of type RECOMMENDATION when contained by ENTITY" + salience 256 + when + $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) + then + $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY"); + retract($recommendation); + end + + +// Rule unit: X.7 +rule "X.7.0: remove all images" + salience 512 + when + $image: Image(imageType != ImageType.OCR, !hasManualChanges()) + then + $image.remove("X.7.0", "remove all images"); + retract($image); + end + + +//------------------------------------ File attributes rules ------------------------------------ + +// Rule unit: FA.1 +rule "FA.1.0: Remove duplicate FileAttributes" + salience 64 + when + $fileAttribute: FileAttribute($label: label, $value: value) + $duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value) + then + retract($duplicate); + end + + +//------------------------------------ Local dictionary search rules ------------------------------------ + +// Rule unit: LDS.0 +rule "LDS.0.0: Run local dictionary search" + agenda-group "LOCAL_DICTIONARY_ADDS" + salience -999 + when + $dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels() + then + entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) + .forEach(entity -> { + Collection matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue()); + entity.addMatchedRules(matchedRules); + }); + end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo_OLD.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo_OLD.drl new file mode 100644 index 00000000..02dd3b78 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo_OLD.drl @@ -0,0 +1,134 @@ +package drools + +import com.iqser.red.service.redaction.v1.server.redaction.model.Section + +global Section section + + +// --------------------------------------- Your rules below this line-------------------------------------------------- + +rule "0a: Study Type File Attribute" + when + Section( + !fileAttributeContainsAnyOf("OECD Number","402","403","404","405","425","429","436","438","439","471","487") + && ( + text.contains("DATA REQUIREMENT") + || text.contains("TEST GUIDELINE") + || text.contains("MÉTODO(S) DE REFERÊNCIA(S):") + ) + && ( + text.contains("OECD") + || text.contains("EPA") + || text.contains("OPPTS") + ) + ) + then + section.addFileAttribute("OECD Number", "(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|(?:.{5,40}(?:Number |Procedure |Guideline )))(4[\\d]{2})", true, 1); + section.addFileAttribute("OECD Number", "(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", true, 1); + end + + +rule "1: Guidelines" + when + Section( + ( + text.contains("DATA REQUIREMENT") + || text.contains("TEST GUIDELINE") + || text.contains("MÉTODO(S) DE REFERÊNCIA(S):") + ) + && ( + text.contains("OECD") + || text.contains("EPA") + || text.contains("OPPTS") + ) + ) + then + section.redactByRegEx("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", true, 1, "oecd_guideline_number", 1, "OECD Guideline no. found", "n-a"); + section.redactByRegEx("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", true, 2, "oecd_guideline_year", 1, "OECD Guideline year found", "n-a"); + section.redactByRegEx("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", true, 1, "oecd_guideline_year", 1, "OECD Guideline year found", "n-a"); + section.redactByRegEx("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", true, 1, "oecd_guideline_number", 1, "OECD Guideline number found", "n-a"); + section.redactByRegEx("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", true, 2, "oecd_guideline_year", 1, "OECD Guideline year found", "n-a"); + end + + +rule "2: Full Table extraction (Guideline Deviation)" + when + Section( + fileAttributeByLabelEqualsIgnoreCase("OECD Number","425") + && headlineContainsWord("Full Table") + && hasTableHeader("Sex") + ) + then + section.redactSectionTextWithoutHeadLine("guideline_deviation",2,"Full table extraction into guideline deviation","n-a"); + end + +rule "3: Individual row extraction (Clinical Signs)" + when + Section( + fileAttributeByLabelEqualsIgnoreCase("OECD Number","425") + && headlineContainsWord("Individual Rows") + && hasTableHeader("Animal No.") + && (rowEquals("Animal No.","120-2") || rowEquals("Animal No.","120-5")) + ) + then + section.redactSectionTextWithoutHeadLine("clinical_signs",3,"Individual row based on animal number","n-a"); + end + +rule "4: Individual column extraction (Strain)" + when + Section( + fileAttributeByLabelEqualsIgnoreCase("OECD Number","425") + && headlineContainsWord("Individual Column") + && hasTableHeader("Sex") + ) + then + section.redactCell("Sex",4,"dosages",false,"Individual column based on column header","n-a"); + end + +rule "5: Dose Mortality" + when + Section( + fileAttributeByLabelEqualsIgnoreCase("OECD Number","425") + && headlineContainsWord("Combined Columns") + && hasTableHeader("Mortality") + && hasTableHeader("Dosage (mg/kg bw)") + ) + then + section.redactCell("Mortality",5,"dose_mortality",false,"Dose Mortality found.","n-a"); + section.redactCell("Dosage (mg/kg bw)",5,"dose_mortality_dose",false,"Dose Mortality dose found.","n-a"); + end + +rule "6: targeted cell extraction (Experimental Start date)" + when + Section( + fileAttributeByLabelEqualsIgnoreCase("OECD Number","425") + && headlineContainsWord("Value Extraction") + && hasTableHeader("Mortality") + && (rowEquals("Sex","male") || rowEquals("Sex","Male")) + && rowEquals("Mortality","Survived") + ) + then + section.redactCell("Treatment start",6,"experimental_start_date",false,"Female deaths date to experimental start date","n-a"); + end + +rule "7: targeted cell extraction (Experimental Stop date)" + when + Section( + isInTable() + && (searchText.contains("female") || searchText.contains("Female")) + && searchText.contains("Survived") + ) + then + section.redactCellBelow(7,"experimental_end_date",true,false,"Female deaths date to experimental start date","n-a", "Sex", "Group 2"); + end + +rule "8: Indicator (Species)" + when + Section( + fileAttributeByLabelEqualsIgnoreCase("OECD Number","425") + && headlineContainsWord("Entity-Based") + && matchesType("vertebrates") + ) + then + section.redactCell("Title",8,"study_design",false,"Vertebrate study found","n-a"); + end \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo_components.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo_components.drl new file mode 100644 index 00000000..f1d18f26 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo_components.drl @@ -0,0 +1,520 @@ +package drools + +import static java.lang.String.format; +import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.anyMatch; +import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.exactMatch; + +import java.util.List; +import java.util.LinkedList; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.Collection; +import java.util.stream.Stream; +import java.util.Optional; + +import com.iqser.red.service.redaction.v1.server.model.component.Component; +import com.iqser.red.service.redaction.v1.server.model.component.Entity; +import com.iqser.red.service.redaction.v1.server.service.document.ComponentCreationService; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Change; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; + +global ComponentCreationService componentCreationService + +//------------------------------------ queries ------------------------------------ + +query "getFileAttributes" + $fileAttribute: FileAttribute() + end + +query "getComponents" + $component: Component() + end + +//------------------------------------ Guideline mapping object ------------------------------------ + +declare GuidelineMapping + number: String + year: String + guideline: String + end + +//------------------------------------ Default Components rules ------------------------------------ + +rule "StudyTitle.0.0: First Title found" + when + $titleCandidates: List() from collect (Entity(type == "title")) + then + componentCreationService.firstOrElse("StudyTitle.0.0", "Study_Title", $titleCandidates, ""); + end + + +rule "PerformingLaboratory.1.0: Performing Laboratory name and country found in same section" + when + $laboratoryName: Entity(type == "laboratory_name", $node: containingNode) + $laboratoryCountry: Entity(type == "laboratory_country", containingNode == $node) + not Entity(type == "laboratory_country", containingNode == $node, Math.abs($laboratoryName.startOffset - startOffset) < Math.abs($laboratoryName.startOffset - $laboratoryCountry.startOffset)) + then + componentCreationService.create("PerformingLaboratory.1.0", "Performing_Laboratory", $laboratoryName.getValue() + ", " + $laboratoryCountry.getValue(), "Laboratory name and country found!", List.of($laboratoryName, $laboratoryCountry)); + end + +rule "PerformingLaboratory.2.0: Performing Laboratory name but no country found in same section" + when + $laboratoryName: Entity(type == "laboratory_name", $node: containingNode) + not Entity(type == "laboratory_country", containingNode == $node) + then + componentCreationService.create("PerformingLaboratory.2.0", "Performing_Laboratory", $laboratoryName.getValue(), "Only laboratory name found!", List.of($laboratoryName)); + end + +rule "PerformingLaboratory.0.2: Performing Laboratory not found" + salience -1 + when + not Component(name == "Performing_Laboratory") + then + componentCreationService.create("PerformingLaboratory.0.2", "Performing_Laboratory", "", "fallback"); + end + + +rule "ReportNumber.0.0: First Report number found" + when + $reportNumberCandidates: List() from collect (Entity(type == "report_number")) + then + componentCreationService.firstOrElse("ReportNumber.0.0", "Report_Number", $reportNumberCandidates, ""); + end + + +rule "GLPStudy.0.0: GLP Study found" + when + $glpStudyList: List(!isEmpty) from collect(Entity(type == "glp_study")) + then + componentCreationService.create("GLPStudy.0.0", "GLP_Study", "Yes", "Yes if present, No if not", $glpStudyList); + end + +rule "GLPStudy.1.0: GLP Study not found" + when + not Entity(type == "glp_study") + then + componentCreationService.create("GLPStudy.1.0", "GLP_Study", "No", "Yes if present, No if not"); + end + + +rule "TestGuideline.0.0: create OECD number and year guideline mappings" + salience 2 + when + Entity(type == "oecd_guideline_number") + Entity(type == "oecd_guideline_year") + then + insert(new GuidelineMapping("425", "2008", "Nº 425: Acute oral Toxicity - Up-and-Down Procedure (03/10/2008)")); + insert(new GuidelineMapping("425", "2001", "Nº 425: Acute oral Toxicity - Up-and-Down Procedure (17/12/2001)")); + insert(new GuidelineMapping("402", "2017", "Nº 402: Acute Dermal Toxicity (09/10/2017)")); + insert(new GuidelineMapping("402", "1987", "Nº 402: Acute Dermal Toxicity (24/02/1987)")); + insert(new GuidelineMapping("403", "2009", "Nº 403: Acute Inhalation Toxicity (08/09/2009)")); + insert(new GuidelineMapping("403", "1981", "Nº 403: Acute Inhalation Toxicity (12/05/1981)")); + insert(new GuidelineMapping("433", "2018", "Nº 433: Acute Inhalation Toxicity: Fixed Concentration Procedure (27/06/2018)")); + insert(new GuidelineMapping("433", "2017", "Nº 433: Acute Inhalation Toxicity: Fixed Concentration Procedure (09/10/2017)")); + insert(new GuidelineMapping("436", "2009", "Nº 436: Acute Inhalation Toxicity – Acute Toxic Class Method (08/09/2009)")); + insert(new GuidelineMapping("404", "1981", "Nº 404: Acute Dermal Irritation/Corrosion (12/05/1981)")); + insert(new GuidelineMapping("404", "1992", "Nº 404: Acute Dermal Irritation/Corrosion (17/07/1992)")); + insert(new GuidelineMapping("404", "2002", "Nº 404: Acute Dermal Irritation/Corrosion (24/04/2002)")); + insert(new GuidelineMapping("404", "2015", "Nº 404: Acute Dermal Irritation/Corrosion (28/07/2015)")); + insert(new GuidelineMapping("405", "2017", "Nº 405: Acute Eye Irritation/Corrosion (09/10/2017)")); + insert(new GuidelineMapping("405", "2012", "Nº 405: Acute Eye Irritation/Corrosion (02/10/2012)")); + insert(new GuidelineMapping("405", "2002", "Nº 405: Acute Eye Irritation/Corrosion (24/04/2002)")); + insert(new GuidelineMapping("405", "1987", "Nº 405: Acute Eye Irritation/Corrosion (24/02/1987)")); + insert(new GuidelineMapping("429", "2002", "Nº 429: Skin Sensitisation: Local Lymph Node Assay (24/04/2002)")); + insert(new GuidelineMapping("429", "2010", "Nº 429: Skin Sensitisation (23/07/2010)")); + insert(new GuidelineMapping("442A", "2018", "Nº 442A: Skin Sensitization (23/07/2018)")); + insert(new GuidelineMapping("442B", "2018", "Nº 442B: Skin Sensitization (27/06/2018)")); + insert(new GuidelineMapping("471", "1997", "Nº 471: Bacterial Reverse Mutation Test (21/07/1997)")); + insert(new GuidelineMapping("471", "2020", "Nº 471: Bacterial Reverse Mutation Test (26/06/2020)")); + insert(new GuidelineMapping("406", "1992", "Nº 406: Skin Sensitisation (1992)")); + insert(new GuidelineMapping("428", "2004", "Nº 428: Split-Thickness Skin test (2004)")); + insert(new GuidelineMapping("438", "2018", "Nº 438: Eye Irritation (26/06/2018)")); + insert(new GuidelineMapping("439", "2019", "Nº 439: Skin Irritation (2019)")); + insert(new GuidelineMapping("474", "2016", "Nº 474: Micronucleus Bone Marrow Cells Rat (2016)")); + insert(new GuidelineMapping("487", "2016", "Nº 487: Micronucleus Human Lymphocytes (2016)")); + end + +rule "TestGuideline.0.1: match OECD number and year with guideline mappings" + salience 1 + when + not Component(name == "Test_Guidelines_1") + GuidelineMapping($year: year, $number: number, $guideline: guideline) + $guidelineNumber: Entity(type == "oecd_guideline_number", value == $number) + $guidelineYear: Entity(type == "oecd_guideline_year", value == $year) + then + componentCreationService.create( + "TestGuideline.0.0", + "Test_Guidelines_1", + $guideline, + "OECD Number and guideline year mapped!", + List.of($guidelineNumber, $guidelineYear) + ); + end + +rule "TestGuideline.1.0: no guideline mapping found" + when + not Component(name == "Test_Guidelines_1") + $guideLine: Entity(type == "oecd_guideline") + then + componentCreationService.create("TestGuideline.2.0", "Test_Guidelines_1", $guideLine.getValue(), "No Mapping for OECD number and year found, using fallback instead!", List.of($guideLine)); + end + +rule "TestGuideline.2.0: All values of EPA guideline and EC guidelines" + when + $guidelines: List() from collect (Entity(type == "epa_guideline" || type == "ec_guideline")) + then + componentCreationService.joining("TestGuideline.2.0", "Test_Guidelines_2", $guidelines); + end + + +rule "StartDate.0.0: All experimental start dates converted to dd/MM/yyyy" + when + $startDates: List() from collect (Entity(type == "experimental_start_date")) + then + componentCreationService.convertDates("StartDate.0.0", "Experimental_Starting_Date", $startDates); + end + + +rule "CompletionDate.0.0: All experimental end dates converted to dd/MM/yyyy" + when + $endDates: List() from collect (Entity(type == "experimental_end_date")) + then + componentCreationService.convertDates("CompletionDate.0.0", "Experimental_Completion_Date", $endDates); + end + + +rule "AnalysisCertificate.0.0: Unique values of certificate of analysis batch identification" + when + $batchNumbers: List() from collect (Entity(type == "batch_number")) + then + componentCreationService.joiningUnique("AnalysisCertificate.0.0", "Certificate_of_Analysis_Batch_Identification", $batchNumbers); + end + +rule "StudyConclusion.0.0: Study conclusion in first found section" + when + $oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $studyConclusions: List() from collect(Entity(type == "study_conclusion")) + then + componentCreationService.joiningFromFirstSectionOnly("StudyConclusion.0.0", "Study_Conclusion", $studyConclusions, " "); + end + +rule "GuidelineDeviation.0.0: Guideline deviation as sentences" + when + $oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $guidelineDeviations: List() from collect (Entity(type == "guideline_deviation")) + then + componentCreationService.joining("GuidelineDeviation.0.0", "Deviation_from_the_Guideline", $guidelineDeviations, "\n"); + end + +rule "Species.0.0: First found species" + when + $oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $species: List() from collect (Entity(type == "species")) + then + componentCreationService.firstOrElse("Species.0.0", "Species", $species, ""); + end + +rule "Strain.0.0: First found strain" + when + $oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $strain: List() from collect (Entity(type == "strain")) + then + componentCreationService.firstOrElse("Strain.0.0", "Strain", $strain, ""); + end + +rule "Conclusion.0.0: Unique values of Conclusion LD50" + when + $oecdNumber: String() from List.of("402", "403", "425", "436") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $conclusions: List() from collect (Entity(type == "ld50_value")) + then + componentCreationService.joiningUnique("Conclusion.0.0", "Conclusion_LD50_mg_per_kg", $conclusions); + end + +rule "Conclusion0.1.0: Greater than found" + when + $oecdNumber: String() from List.of("402", "403", "425", "436") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $conclusions: List(!isEmpty()) from collect (Entity(type == "ld50_greater")) + then + componentCreationService.create("Conclusion.1.0", "Conclusion_LD50_Greater_than", "Greater than", "Entity of type 'ld50_greater' found", $conclusions); + end + +rule "Conclusion.1.1: Greater than not found" + when + $oecdNumber: String() from List.of("402", "403", "425", "436") + FileAttribute(label == "OECD Number", value == $oecdNumber) + not Entity(type == "ld50_greater") + then + componentCreationService.create("Conclusion.1.1", "Conclusion_LD50_Greater_than", "", "No entity of type 'ld50_greater' found"); + end + +rule "Conclusion.2.0: Minimum confidence as unique values" + when + $oecdNumber: String() from List.of("402", "403", "425", "436") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $conclusions: List() from collect (Entity(type == "confidence_minimal")) + then + componentCreationService.joiningUnique("Conclusion.2.0", "Conclusion_Minimum_Confidence", $conclusions); + end + +rule "Conclusion.3.0: Maximum confidence as unique values" + when + $oecdNumber: String() from List.of("402", "403", "425", "436") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $conclusions: List() from collect (Entity(type == "confidence_maximal")) + then + componentCreationService.joiningUnique("Conclusion.3.0", "Conclusion_Maximum_Confidence", $conclusions); + end + +rule "Necropsy.0.0: Necropsy findings from longest section" + when + FileAttribute(label == "OECD Number", value == "402") + $necropsies: List() from collect (Entity(type == "necropsy_findings")) + then + componentCreationService.joiningFromLongestSectionOnly("Necropsy.0.0", "Necropsy_Findings", $necropsies, " "); + end + +rule "Necropsy.0.1: Necropsy findings joined with \n" + when + FileAttribute(label == "OECD Number", value == "403" || value == "436") + $necropsies: List() from collect (Entity(type == "necropsy_findings")) + then + componentCreationService.joining("Necropsy.0.0", "Necropsy_Findings", $necropsies, "\n"); + end + +rule "Necropsy.1.0: Doses mg per kg of Bodyweight as one block" + when + FileAttribute(label == "OECD Number", value == "402") + $dosages: List() from collect (Entity(type == "doses_(mg_kg_bw)")) + then + componentCreationService.joining("Necropsy.1.0", "Doses_mg_per_kg_bw", $dosages, " "); + end + +rule "Necropsy.2.0: Conducted with 4 hours of exposure as one block" + when + $oecdNumber: String() from List.of("403", "436") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $exposures: List() from collect (Entity(type == "4h_exposure")) + then + componentCreationService.joining("Necropsy.3.0", "Conducted_with_4_Hours_of_Exposure", $exposures, " "); + end + +rule "StudyDesign.0.0: Study design as one block" + when + $oecdNumber: String() from List.of("404", "405", "429", "406", "428", "438", "439", "474", "487") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $studyDesigns: List() from collect (Entity(type == "study_design")) + then + componentCreationService.joining("StudyDesign.0.0", "Study_Design", $studyDesigns, " "); + end + +rule "Results.0.0: Results and conclusions as joined values" + when + $oecdNumber: String() from List.of("406", "428", "438", "439", "474", "487") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $results: List() from collect (Entity(type == "results_and_conclusion")) + then + componentCreationService.joining("Results.0.0", "Results_and_Conclusions", $results, " "); + end + +rule "WeightBehavior.0.0: Weight change behavior as sentences" + when + FileAttribute(label == "OECD Number", value == "402") + $weightChanges: List() from collect (Entity(type == "weight_behavior_changes")) + then + componentCreationService.joining("WeightBehavior.0.0", "Weight_Behavior_Changes", $weightChanges, "\n"); + end + +rule "MortalityStatement.0.0: Mortality statements as one block" + when + FileAttribute(label == "OECD Number", value == "402") + $mortalityStatements: List() from collect (Entity(type == "mortality_statement")) + then + componentCreationService.joining("MortalityStatement.0.0", "Mortality_Statement", $mortalityStatements, " "); + end + +rule "ClinicalObservations.0.0: Clinical observations as sentences" + when + FileAttribute(label == "OECD Number", value == "403") + $observations: List() from collect (Entity(type == "clinical_observations")) + then + componentCreationService.joining("MortalityStatement.0.0", "Clinical_Observations", $observations, "\n"); + end + +rule "BodyWeight.0.0: Bodyweight changes as sentences" + when + FileAttribute(label == "OECD Number", value == "403") + $weightChanges: List() from collect (Entity(type == "bodyweight_changes")) + then + componentCreationService.joining("BodyWeight.0.0", "Body_Weight_Changes", $weightChanges, "\n"); + end + +rule "Detailing.0.0: Detailing of reported changes as one block" + when + $oecdNumber: String() from List.of("404", "405") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $detailings: List() from collect (Entity(type == "detailing")) + then + componentCreationService.joining("Detailing.0.0", "Detailing_of_Reported_Changes", $detailings, " "); + end + +rule "Sex.0.0: Male sex found" + when + $oecdNumber: String() from List.of("405", "429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $males: List(!isEmpty) from collect (Entity(type == "sex", (value.toLowerCase() == "male" || value.toLowerCase() == "males"))) + then + componentCreationService.create("Sex.0.0", "Sex", "male", "male sex found", $males); + end + +rule "Sex.1.0: Female sex found" + when + $oecdNumber: String() from List.of("405", "429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $females: List(!isEmpty) from collect (Entity(type == "sex", (value.toLowerCase() == "female" || value.toLowerCase() == "females"))) + then + componentCreationService.create("Sex.0.0", "Sex", "female", "female sex found", $females); + end + +rule "NumberOfAnimals.0.0: Number of animals found" + when + $oecdNumber: String() from List.of("405", "429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $numberOfAnimals: Entity(type == "number_of_animals") + then + componentCreationService.create("NumberOfAnimals.0.0", "Number_of_Animals", $numberOfAnimals.getValue(), "Number of animals found directly", $numberOfAnimals); + end + +rule "NumberOfAnimals.1.0: Count unique occurences of animals" + when + $oecdNumber: String() from List.of("405", "429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + not Entity(type == "number_of_animals") + $animals: List() from collect (Entity(type == "animal_number")) + then + componentCreationService.uniqueValueCount("NumberOfAnimals.1.0", "Number_of_Animals", $animals); + end + +rule "ClinicalSigns.0.0: Clinical signs as sentences" + when + $oecdNumber: String() from List.of("425") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $clinicalSigns: List() from collect (Entity(type == "clinical_signs")) + then + componentCreationService.joining("ClinicalSigns.0.0", "Clinical_Signs", $clinicalSigns, "\n"); + end + +rule "DoseMortality.0.0: Dose mortality joined with dose from same table row" + when + $oecdNumber: String() from List.of("425") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $doseMortalities: List() from collect (Entity(type == "dose_mortality" || type == "dose_mortality_dose")) + then + componentCreationService.joiningFromSameTableRow("DoseMortality.0.0", "Dose_Mortality", $doseMortalities); + end + +rule "Mortality.0.0: Mortality as one block" + when + $oecdNumber: String() from List.of("425") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $mortalities: List() from collect (Entity(type == "mortality")) + then + componentCreationService.joining("Mortality.0.0", "Mortality", $mortalities, " "); + end + +rule "Dosages.0.0: First found value of Dosages" + when + $oecdNumber: String() from List.of("425") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $mortalities: List() from collect (Entity(type == "dosages")) + then + componentCreationService.firstOrElse("Dosages.0.0", "Dosages", $mortalities, ""); + end + +rule "PrelimResults.0.0: Preliminary test results as sentences" + when + $oecdNumber: String() from List.of("429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $results: List() from collect (Entity(type == "preliminary_test_results")) + then + componentCreationService.joining("PrelimResults.0.0", "Preliminary_Test_Results", $results, "\n"); + end + +rule "TestResults.0.0: Test results as one block" + when + $oecdNumber: String() from List.of("429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $results: List() from collect (Entity(type == "test_results")) + then + componentCreationService.joining("TestResults.0.0", "Test_Results", $results, " "); + end + +rule "PositiveControl.0.0: Was the definitive study conducted with positive control" + when + $oecdNumber: String() from List.of("429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $results: List() from collect (Entity(type == "positive_control")) + then + componentCreationService.joining("PositiveControl.0.0", "Was_the_definitive_study_conducted_with_positive_control", $results, " "); + end + +rule "MainResults.0.0: Results from main study as one block" + when + $oecdNumber: String() from List.of("429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $results: List() from collect (Entity(type == "results_(main_study)")) + then + componentCreationService.joining("MainResults.0.0", "Results_Main_Study", $results, " "); + end + +rule "UsedApproach.0.0: Used approach found and mapped to 'Group'" + when + $oecdNumber: String() from List.of("429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + $results: List(!isEmpty()) from collect (Entity(type == "approach_used")) + then + componentCreationService.create("UsedApproach.0.0", "What_was_the_approach_used", "Group", "'Group' when approach used is present, else 'Individual'", $results); + end + +rule "UsedApproach.1.0: Used approach not found and thus 'Individual'" + when + $oecdNumber: String() from List.of("429") + FileAttribute(label == "OECD Number", value == $oecdNumber) + not Entity(type == "approach_used") + then + componentCreationService.create("UsedApproach.1.0", "What_was_the_approach_used", "Individual", "'Group' when approach used is present, else 'Individual'"); + end + +/* +rule "DefaultComponents.999.0: Create components for all unmapped entities." + salience -999 + when + $allEntities: List(!isEmpty()) from collect (Entity()) + then + componentCreationService.createComponentsForUnMappedEntities("DefaultComponents.999.0", $allEntities); + end +*/ + +//------------------------------------ Component merging rules ------------------------------------ +/* +rule "X.0.0: merge duplicate component references" + when + $first: Component() + $duplicate: Component(this != $first, name == $first.name, value == $first.value) + then + $first.getReferences().addAll($duplicate.getReferences()); + retract($duplicate); + end +*/ \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/TableDemo/Table_examples.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/TableDemo/Table_examples.pdf new file mode 100644 index 00000000..e6dea477 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/TableDemo/Table_examples.pdf differ