diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ComponentLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ComponentLogCreatorService.java index 4394d8a8..cce55740 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ComponentLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ComponentLogCreatorService.java @@ -1,7 +1,9 @@ package com.iqser.red.service.redaction.v1.server.service; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; -import java.util.stream.Collectors; +import java.util.Map; import org.springframework.stereotype.Service; @@ -12,6 +14,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position; import com.iqser.red.service.redaction.v1.server.model.component.Component; import com.iqser.red.service.redaction.v1.server.model.component.Entity; +import com.iqser.red.service.redaction.v1.server.service.document.ComponentComparator; import com.iqser.red.service.redaction.v1.server.service.document.EntityComparators; @Service @@ -19,8 +22,12 @@ public class ComponentLogCreatorService { public ComponentLog buildComponentLog(int analysisNumber, List components, long componentRulesVersion) { - List componentLogComponents = components.stream() - .collect(Collectors.groupingBy(Component::getName, Collectors.mapping(this::buildComponentLogEntry, Collectors.toList()))) + Map> map = new HashMap<>(); + components.stream().sorted(ComponentComparator.first()).forEach(component -> { + ComponentLogEntryValue componentLogEntryValue = buildComponentLogEntry(component); + map.computeIfAbsent(component.getName(), k -> new ArrayList<>()).add(componentLogEntryValue); + }); + List componentLogComponents = map .entrySet() .stream().map(entry -> new ComponentLogEntry(entry.getKey(), entry.getValue())) .toList(); @@ -34,7 +41,7 @@ public class ComponentLogCreatorService { .value(component.getValue()).originalValue(component.getValue()) .componentRuleId(component.getMatchedRule().toString()) .valueDescription(component.getValueDescription()) - .componentLogEntityReferences(toComponentEntityReferences(component.getReferences().stream().sorted(EntityComparators.start()).toList())) + .componentLogEntityReferences(toComponentEntityReferences(component.getReferences().stream().sorted(EntityComparators.first()).toList())) .build(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentComparator.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentComparator.java new file mode 100644 index 00000000..c2c4ecc9 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentComparator.java @@ -0,0 +1,33 @@ +package com.iqser.red.service.redaction.v1.server.service.document; + +import java.util.Comparator; + +import com.iqser.red.service.redaction.v1.server.model.component.Component; + +public class ComponentComparator implements Comparator { + + public static ComponentComparator first() { + + return new ComponentComparator(); + } + + + @Override + public int compare(Component component1, Component component2) { + + var firstEntity1 = component1.getReferences().stream().min(EntityComparators.first()); + var firstEntity2 = component2.getReferences().stream().min(EntityComparators.first()); + if (firstEntity1.isEmpty() && firstEntity2.isEmpty()) { + return 0; + } + if (firstEntity1.isEmpty() && firstEntity2.isPresent()) { + return -1; + } + if (firstEntity1.isPresent() && firstEntity2.isEmpty()) { + return 1; + } + + return new EntityComparators.FirstEntity().compare(firstEntity1.get(), firstEntity2.get()); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java index 56f069d4..59410a83 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ComponentCreationService.java @@ -37,19 +37,20 @@ public class ComponentCreationService { Set referencedEntities = new HashSet<>(); - /** - * Finds the first value from the collection of entities and creates a component from it. If no value is found, the fallback value is used instead. - * - * @param ruleIdentifier the identifier for the rule - * @param name the name of the operation - * @param entities the collection of entities to search for the first value - * @param fallback the value to be returned if no value is found in the collection - */ - public void firstOrElse(String ruleIdentifier, String name, Collection entities, String fallback) { + private static List findEntitiesFromLongestSection(Collection entities) { - String valueDescription = String.format("First found value or else '%s'", fallback); - String value = entities.stream().min(EntityComparators.start()).map(Entity::getValue).orElse(fallback); - create(ruleIdentifier, name, value, valueDescription, entities); + var entitiesBySection = entities.stream().collect(Collectors.groupingBy(entity -> entity.getContainingNode().getHighestParent())); + Optional longestSection = entitiesBySection.entrySet() + .stream() + .sorted(Comparator.comparingInt(ComponentCreationService::getTotalLengthOfEntities).reversed()) + .map(Map.Entry::getKey) + .findFirst(); + + if (longestSection.isEmpty()) { + return Collections.emptyList(); + } + + return entitiesBySection.get(longestSection.get()); } @@ -67,20 +68,27 @@ public class ComponentCreationService { /** - * Joins entity values, and creates a component from the result. + * Finds the first value from the collection of entities and creates a component from it. If no value is found, the fallback value is used instead. * - * @param ruleIdentifier The identifier of the rule. - * @param name The name of the entity. - * @param entities The collection of entities to process. + * @param ruleIdentifier the identifier for the rule + * @param name the name of the operation + * @param entities the collection of entities to search for the first value + * @param fallback the value to be returned if no value is found in the collection */ - public void joining(String ruleIdentifier, String name, Collection entities, String delimiter) { + public void firstOrElse(String ruleIdentifier, String name, Collection entities, String fallback) { - String valueDescription = String.format("Joining all values with '%s'", delimiter); - String value = entities.stream().sorted(EntityComparators.start()).map(Entity::getValue).collect(Collectors.joining(delimiter)); + String valueDescription = String.format("First found value of type %s or else '%s'", joinTypes(entities), fallback); + String value = entities.stream().min(EntityComparators.first()).map(Entity::getValue).orElse(fallback); create(ruleIdentifier, name, value, valueDescription, entities); } + private static String joinTypes(Collection entities) { + + return entities.stream().map(Entity::getType).distinct().collect(Collectors.joining(", ")); + } + + /** * Creates a new component with the given parameters and inserts it into the kieSession. * @@ -166,17 +174,16 @@ public class ComponentCreationService { /** - * Joins all unique values from a collection of entities into a single string using a specified delimiter and creates a component from the result. + * Joins entity values, and creates a component from the result. * - * @param ruleIdentifier the identifier of the rule - * @param name the name of the joining operation - * @param entities the collection of entities - * @param delimiter the delimiter to use for joining the values + * @param ruleIdentifier The identifier of the rule. + * @param name The name of the entity. + * @param entities The collection of entities to process. */ - public void joiningUnique(String ruleIdentifier, String name, Collection entities, String delimiter) { + public void joining(String ruleIdentifier, String name, Collection entities, String delimiter) { - String valueDescription = String.format("Joining all values with '%s'", delimiter); - String value = entities.stream().sorted(EntityComparators.start()).map(Entity::getValue).distinct().collect(Collectors.joining(delimiter)); + String valueDescription = String.format("Joining all values of type %s with '%s'", joinTypes(entities), delimiter); + String value = entities.stream().sorted(EntityComparators.first()).map(Entity::getValue).collect(Collectors.joining(delimiter)); create(ruleIdentifier, name, value, valueDescription, entities); } @@ -208,18 +215,19 @@ public class ComponentCreationService { } - private static List findEntitiesFromLongestSection(Collection entities) { + /** + * Joins all unique values from a collection of entities into a single string using a specified delimiter and creates a component from the result. + * + * @param ruleIdentifier the identifier of the rule + * @param name the name of the joining operation + * @param entities the collection of entities + * @param delimiter the delimiter to use for joining the values + */ + public void joiningUnique(String ruleIdentifier, String name, Collection entities, String delimiter) { - var entitiesBySection = entities.stream().collect(Collectors.groupingBy(entity -> entity.getContainingNode().getHighestParent())); - Optional longestSection = entitiesBySection.entrySet() - .stream().sorted(Comparator.comparingInt(ComponentCreationService::getTotalLengthOfEntities).reversed()).map(Map.Entry::getKey) - .findFirst(); - - if (longestSection.isEmpty()) { - return Collections.emptyList(); - } - - return entitiesBySection.get(longestSection.get()); + String valueDescription = String.format("Joining all unique values of type %s with '%s'", joinTypes(entities), delimiter); + String value = entities.stream().sorted(EntityComparators.first()).map(Entity::getValue).distinct().collect(Collectors.joining(delimiter)); + create(ruleIdentifier, name, value, valueDescription, entities); } @@ -297,7 +305,7 @@ public class ComponentCreationService { if (entities.isEmpty()) { return; } - for (Entity entity : entities) { + entities.stream().sorted(EntityComparators.first()).forEach(entity -> { BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.ENGLISH); iterator.setText(entity.getValue()); int start = iterator.first(); @@ -308,7 +316,7 @@ public class ComponentCreationService { String.format("Values of type '%s' as sentences", entity.getType()), entity); } - } + }); } @@ -346,8 +354,7 @@ public class ComponentCreationService { */ public void createComponentsForUnMappedEntities(String ruleIdentifier, Collection entities) { - entities.stream() - .filter(entity -> !referencedEntities.contains(entity)) + entities.stream().filter(entity -> !referencedEntities.contains(entity)).sorted(EntityComparators.first()) .forEach(entity -> create(ruleIdentifier, entity.getType(), entity.getValue(), "Unmapped Entity", List.of(entity))); } @@ -375,7 +382,7 @@ public class ComponentCreationService { */ public void convertDates(String ruleIdentifier, String name, Collection entities, String resultFormat) { - String valueDescription = String.format("Convert values of type to %s joined with ', '", resultFormat); + String valueDescription = String.format("Convert values of type '%s' to %s joined with ', '", joinTypes(entities), resultFormat); List unparsedDates = new LinkedList<>(); List dates = new LinkedList<>(); @@ -407,10 +414,9 @@ public class ComponentCreationService { */ public void joiningFromSameTableRow(String ruleIdentifier, String name, Collection entities) { - String types = entities.stream().map(Entity::getType).distinct().collect(Collectors.joining()); + String types = entities.stream().map(Entity::getType).sorted(Comparator.reverseOrder()).distinct().collect(Collectors.joining(", ")); String valueDescription = String.format("Combine values of %s that are in same table row", types); - Map, List> entitiesPerTable = entities.stream().collect(Collectors.groupingBy(this::getFirstTable)); - entitiesPerTable.forEach((optionalTable, groupedEntities) -> { + entities.stream().collect(Collectors.groupingBy(this::getFirstTable)).forEach((optionalTable, groupedEntities) -> { if (optionalTable.isEmpty()) { groupedEntities.forEach(entity -> create(ruleIdentifier, name, entity.getValue(), valueDescription, entity)); } @@ -422,9 +428,13 @@ public class ComponentCreationService { groupedEntities.stream() .filter(entity -> entity.getContainingNode() instanceof TableCell) .collect(Collectors.groupingBy(entity -> ((TableCell) entity.getContainingNode()).getRow())) - .forEach((row, entitiesInSameRow) -> create(ruleIdentifier, + .entrySet() + .stream() + .sorted(Comparator.comparingInt(Map.Entry::getKey)) + .map(Map.Entry::getValue) + .forEach(entitiesInSameRow -> create(ruleIdentifier, name, - entities.stream().map(Entity::getValue).collect(Collectors.joining(", ")), + entitiesInSameRow.stream().sorted(Comparator.comparing(Entity::getType).reversed()).map(Entity::getValue).collect(Collectors.joining(", ")), valueDescription, entitiesInSameRow)); }); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityComparators.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityComparators.java index 7364bcdd..157fbfd6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityComparators.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityComparators.java @@ -6,7 +6,12 @@ import com.iqser.red.service.redaction.v1.server.model.component.Entity; public abstract class EntityComparators implements Comparator { - private static class LongestEntity implements Comparator { + public static Comparator first() { + + return new FirstEntity(); + } + + public static class LongestEntity implements Comparator { @Override public int compare(Entity Entity, Entity otherEntity) { @@ -16,26 +21,20 @@ public abstract class EntityComparators implements Comparator { } - private static class FirstEntity implements Comparator { - - @Override - public int compare(Entity Entity, Entity otherEntity) { - - return Integer.compare(Entity.getStartOffset(), otherEntity.getStartOffset()); - } - - } - public static Comparator length() { return new LongestEntity(); } + public static class FirstEntity implements Comparator { - public static Comparator start() { + @Override + public int compare(Entity Entity, Entity otherEntity) { + + return Integer.compare(Entity.getStartOffset(), otherEntity.getStartOffset()); + } - return new FirstEntity(); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java index 54d87fd5..4ff0f3f2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java @@ -21,6 +21,7 @@ import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings; import com.iqser.red.service.redaction.v1.server.model.component.Component; import com.iqser.red.service.redaction.v1.server.model.component.Entity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; +import com.iqser.red.service.redaction.v1.server.service.document.ComponentComparator; import com.iqser.red.service.redaction.v1.server.service.document.ComponentCreationService; import com.iqser.red.service.redaction.v1.server.utils.exception.DroolsTimeoutException; @@ -70,7 +71,7 @@ public class ComponentDroolsExecutionService { } List resultingFileAttributes = getFileAttributes(kieSession); - List components = getComponents(kieSession); + List components = getComponents(kieSession).stream().sorted(ComponentComparator.first()).toList(); kieSession.dispose(); return components; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java index d79234ce..237f58a8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java @@ -48,7 +48,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest { // @Disabled public void titleExtraction() throws IOException { - AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/_000008810-0.2.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/A8591B/15-Curacron_ToxicidadeAgudaOral.pdf"); // AnalyzeRequest request = prepareStorage("files/Documine/Flora/ProblemDocs/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (1).pdf", // "files/Documine/Flora/ProblemDocs/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (1).TABLES.json"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java index 061ccc26..c51cadd6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesEnd2EndTest.java @@ -78,6 +78,7 @@ import lombok.SneakyThrows; public class ManualChangesEnd2EndTest extends AbstractRedactionIntegrationTest { private static final String RULES = loadFromClassPath("drools/rules.drl"); + private static final String DM_RULES = loadFromClassPath("drools/documine_flora.drl"); @Autowired private EntityEnrichmentService entityEnrichmentService; diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java index b2ad45db..52e380d0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/manualchanges/ManualChangesIntegrationTest.java @@ -5,6 +5,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import static org.wildfly.common.Assert.assertFalse; import java.awt.geom.Rectangle2D; +import java.io.FileOutputStream; import java.time.OffsetDateTime; import java.util.Collection; import java.util.Comparator; @@ -27,22 +28,30 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; +import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; +import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; +import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest; +import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse; import com.iqser.red.service.redaction.v1.server.document.graph.BuildDocumentIntegrationTest; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph; +import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils; import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService; import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService; import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException; +import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; + +import lombok.SneakyThrows; @Import(ManualChangesIntegrationTest.TestConfiguration.class) public class ManualChangesIntegrationTest extends BuildDocumentIntegrationTest { @@ -249,6 +258,7 @@ public class ManualChangesIntegrationTest extends BuildDocumentIntegrationTest { assertFalse(entity.removed()); } + private void assertRectanglesAlmostEqual(Collection rects1, Collection rects2) { if (rects1.stream().allMatch(rect1 -> rects2.stream().anyMatch(rect2 -> rectanglesAlmostEqual(rect1, rect2)))) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/service/document/EntityComparatorsTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/service/document/EntityComparatorsTest.java new file mode 100644 index 00000000..39ae7fcd --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/service/document/EntityComparatorsTest.java @@ -0,0 +1,23 @@ +package com.iqser.red.service.redaction.v1.server.service.document; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.stream.Stream; + +import org.junit.jupiter.api.Test; + +import com.iqser.red.service.redaction.v1.server.model.component.Entity; + +class EntityComparatorsTest { + + @Test + public void testFirstEntity() { + + Entity entity1 = Entity.builder().startOffset(0).build(); + Entity entity2 = Entity.builder().startOffset(2).build(); + Entity entity3 = Entity.builder().startOffset(20).build(); + assertEquals(entity1, Stream.of(entity2, entity3, entity1).min(EntityComparators.first()).orElseThrow()); + + } + +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index 326ec246..a1e8a1c9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -64,11 +64,8 @@ global Dictionary dictionary query "getFileAttributes" $fileAttribute: FileAttribute() end - //--------------------------------------------------------------------------- - - rule "H.0.0: retract table of contents page" when $page: Page(getMainBodyTextBlock().getSearchText().contains("........") || (getMainBodyTextBlock().getSearchText().contains("APPENDICES") && getMainBodyTextBlock().getSearchText().contains("TABLES"))) @@ -78,46 +75,38 @@ rule "H.0.0: retract table of contents page" end -rule "H.0.1: Ignore Table of Contents" +rule "H.1.0: Ignore Table of Contents" salience 10 when $tocHeadline: Headline(containsString("CONTENTS")) - + $page: Page() from $tocHeadline.getParent().getPages() + $node: SemanticNode(this != $tocHeadline, getType() != NodeType.IMAGE, onPage($page.getNumber()), !onPage($page.getNumber() -1)) then - $tocHeadline.getParent().getPages() - .forEach(page -> page.getMainBody().stream() - .filter(node -> !node.getType().equals(NodeType.IMAGE)) - .filter(node -> node.getPages().stream().noneMatch(nodePage -> nodePage.getNumber() < page.getNumber())) - .forEach(node -> retract(node)) - ); + retract($node); end - -/* -rule "H.0.0: Show headlines" +// Rule unit: MAN.0 +rule "H.2.0: Show headlines" when $headline: Headline() - $headline: Headline() then - entityCreationService.bySemanticNode($headline, "headline", EntityType.RECOMMENDATION); + entityCreationService.bySemanticNode($headline, "headline", EntityType.ENTITY); end -*/ -rule "H.0.2: Study Type File Attribute" + +rule "H.3.0: Study Type File Attribute" when - not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","408","414","425","429","436","438","439","471","487")) + not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) $section: Section( - onPage(1) + (containsString("DATA REQUIREMENT") || containsString("TEST GUIDELINE") || containsString("MÉTODO(S) DE REFERÊNCIA(S):")) ,(containsString("OECD") || containsString("EPA") || containsString("OPPTS")) ) then Stream.of(RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|(?:.{5,40}(?:Number |Procedure |Guideline )))(4[\\d]{2})", 1, $section.getTextBlock()), RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", 1, $section.getTextBlock()), - RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD) Guideline (4\\d{2})", 1, $section.getTextBlock()), - RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD) Guideline, Method No. (\\d{3})", 1, $section.getTextBlock()) - ).flatMap(Collection::stream).findFirst() - .map(boundary -> $section.getTextBlock().subSequence(boundary).toString()) + RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD) Guideline (4\\d{2})", 1, $section.getTextBlock())).flatMap(Collection::stream).findFirst() + .map(textRange -> $section.getTextBlock().subSequence(textRange).toString()) .map(value -> FileAttribute.builder().label("OECD Number").value(value).build()) .ifPresent(fileAttribute -> insert(fileAttribute)); end @@ -126,7 +115,11 @@ rule "H.0.2: Study Type File Attribute" rule "DOC.1.0: Guidelines" when $section: Section( - onPage(1) + ( + containsString("DATA REQUIREMENT") + || containsString("TEST GUIDELINE") + || containsString("MÉTODO(S) DE REFERÊNCIA(S):") + ) && ( containsString("OECD") || containsString("EPA") @@ -152,14 +145,9 @@ rule "DOC.1.0: Guidelines" entityCreationService.byRegex("OECD Method 4\\d{2}.{5,40}\\(.{5,40}\\d{4}\\)", "oecd_guideline", EntityType.ENTITY, $section).forEach(entity -> entity.apply("DOC.1.0", "OECD Guideline year found", "n-a") ); -// Examples found in PoC 1 -// entityCreationService.byRegex("((OECD Guidelines for Testing of Chemicals, Procedure)|(OECD Guidelines for the Testing of Chemicals No\\.)|(OECD Test Guideline)|(OECD \\[Test Guideline, Number)) \\d{3}( \\(\\d{4}\\))?", "oecd_guideline", EntityType.ENTITY, $section).forEach(entity -> -// entity.apply("DOC.1.0", "OECD Guideline year found", "n-a") -// ); entityCreationService.byRegex("OPPTS (Guideline Number )?\\d{3}\\.\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $section).forEach(entity -> entity.apply("DOC.1.0", "EPA Guideline found", "n-a") ); -// new approach OECD Guideline entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY,1, $section).forEach(entity -> entity.apply("DOC.1.0", "OECD Guideline no. found", "n-a") ); @@ -175,7 +163,6 @@ rule "DOC.1.0: Guidelines" entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY,2, $section).forEach(entity -> entity.apply("DOC.1.0", "OECD Guideline year found", "n-a") ); -// missing OECD guideline rules for RFP demo file entityCreationService.byRegex("(?<=OECD) Guideline (4\\d{2})", "oecd_guideline_number", EntityType.ENTITY,1, $section).forEach(entity -> entity.apply("DOC.1.0", "OECD Guideline number found", "n-a") ); @@ -185,19 +172,6 @@ rule "DOC.1.0: Guidelines" end -rule "DOC.1.1: Guidelines" - when - $headline: Headline( - onPage(1), - containsString("OECD") - ) - then - entityCreationService.byRegex("(OECD (No\\.? )?(\\d{3})( \\(\\d{4}\\))?)", "oecd_guideline", EntityType.ENTITY,1, $headline).forEach(entity -> - entity.apply("DOC.1.1", "OECD Guideline found", "n-a") - ); - end - - rule "DOC.1.2: Guidelines" when $section: Section( @@ -247,105 +221,654 @@ rule "DOC.1.3: Guidelines" ) then $section.getEntitiesOfType(List.of("oecd_guideline", "ec_guideline", "epa_guideline")).forEach(entity -> { - entity.remove("DOC.1.3", "removed by Guidelines rules"); + entity.removeFromGraph(); retract(entity); }); end +rule "DOC.2.0: Report number" + when + $section: Section(containsString("LABORATORY PROJECT ID") , containsString("Report Number:")) + then + entityCreationService.lineAfterString("Report Number:", "report_number", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { + entity.apply("DOC.2.0", "Report number found", "n-a"); + }); + end +rule "DOC.3.0: Experimental Starting Date" + when + $section: Section(containsString("Experimental I. Starting Date:") || containsString("Experimental II. Starting Date:") || containsStringIgnoreCase("experimental start date") || containsStringIgnoreCase("experimental starting date")) + then + entityCreationService.lineAfterStrings( + List.of("Experimental start date", + "Experimental start date:", + "Experimental Starting Date", + "Experimental Starting Date:", + "Experimental starting date", + "Experimental starting date:", + "Experimental Start Date", + "Experimental Start Date:", + "Experimental I. Starting Date:", + "Experimental II. Starting Date:"), "experimental_start_date", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.3.0", "Experimental start date found", "n-a"); + }); + end - -rule "DOC.3.2: Experimental Completion Date" - salience 10 - when - $section: Section(onPage(1) && (containsString("STUDY COMPLETED ON") || containsString("STUDY COMPLETION DATE") || containsString("Report completion date") || containsString("Date of Report") || containsString("AMENDMENT COMPLETION DATE") || containsString("AMENDMENT COMPLETED ON"))) - - then - entityCreationService.byRegex("STUDY COMPLETED ON (.{5,20}\\d{4})", "experimental_end_date", EntityType.ENTITY, 1, $section).forEach(entity -> { - entity.apply("DOC.3.2", "Experimental end date found", "n-a"); - }); - entityCreationService.byRegex("STUDY COMPLETION DATE (.{5,20}\\d{4})", "experimental_end_date", EntityType.ENTITY, 1, $section).forEach(entity -> { - entity.apply("DOC.3.2", "Experimental end date found", "n-a"); - }); - entityCreationService.byRegex("Report completion date (.{5,20}\\d{4})", "experimental_end_date", EntityType.ENTITY, 1, $section).forEach(entity -> { - entity.apply("DOC.3.2", "Experimental end date found", "n-a"); - }); - entityCreationService.byRegex("Date of Report (.{5,20}\\d{4})", "experimental_end_date", EntityType.ENTITY, 1, $section).forEach(entity -> { - entity.apply("DOC.3.2", "Experimental end date found", "n-a"); - }); - entityCreationService.byRegex("AMENDMENT COMPLETION DATE (.{5,20}\\d{4})", "experimental_end_date", EntityType.ENTITY, 1, $section).forEach(entity -> { - entity.apply("DOC.3.2", "Experimental end date found", "n-a"); - }); - entityCreationService.byRegex("AMENDMENT COMPLETED ON (.{5,20}\\d{4})", "experimental_end_date", EntityType.ENTITY, 1, $section).forEach(entity -> { - entity.apply("DOC.3.2", "Experimental end date found", "n-a"); - }); - end +rule "DOC.4.0: Experimental Completion Date" + when + $section: Section(containsStringIgnoreCase("experimental termination date") || containsStringIgnoreCase("experimental completion date")) + then + entityCreationService.lineAfterStrings( + List.of("Experimental termination date", + "Experimental termination date:", + "Experimental Completion Date", + "Experimental Completion Date:", + "Experimental completion date", + "Experimental completion date:", + "Experimental Termination Date", + "Experimental Termination Date:"), "experimental_end_date", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.4.0", "Experimental end date found", "n-a"); + }); + end - - - - // hide all skipped species and strains except in the relevant sections - rule "DOC.4.2: Species" + rule "DOC.5.0: Ignore species and strain in irrelevant study types" salience 1 when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","471","474","487")) + $section: Section(hasEntitiesOfType("species") || hasEntitiesOfType("strain")) + then + $section.getEntitiesOfType(List.of("species", "strain")).forEach(entity -> { + entity.removeFromGraph(); + retract(entity); + }); + end + + + rule "DOC.5.1: Hide all skipped species and strains except in the relevant sections" + salience 1 + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) $section: Section( (hasEntitiesOfType("species") || hasEntitiesOfType("strain")) && !( anyHeadlineContainsStringIgnoreCase("test system") - || anyHeadlineContainsStringIgnoreCase("Species and strain") + || anyHeadlineContainsStringIgnoreCase("animals") || anyHeadlineContainsStringIgnoreCase("specification") ) ) then $section.getEntitiesOfType(List.of("species", "strain")).forEach(entity -> { - entity.remove("DOC.4.2","n-a"); + entity.removeFromGraph(); retract(entity); }); end -rule "DOC.4.3: Species" +rule "DOC.5.2: Species" when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) $section: Section(hasEntitiesOfType("species")) then $section.getEntitiesOfType("species").forEach(entity -> { - entity.apply("DOC.4.3", "Species found.", "n-a"); + entity.apply("DOC.5.2", "Species found.", "n-a"); entity.setValue(entity.getValue().toLowerCase()); }); end -rule "DOC.5.0: Strain" +rule "DOC.5.3: Strain" when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) $section: Section( hasEntitiesOfType("species") && hasEntitiesOfType("strain") && ( anyHeadlineContainsStringIgnoreCase("test system") - || anyHeadlineContainsStringIgnoreCase("Species and strain") + || anyHeadlineContainsStringIgnoreCase("animals") || anyHeadlineContainsStringIgnoreCase("specification") ) ) then $section.getEntitiesOfType("strain").forEach(entity -> { - entity.apply("DOC.5.0", "Strain found.", "n-a"); + entity.apply("DOC.5.3", "Strain found.", "n-a"); }); end - - -rule "DOC.35.0: Sex" +rule "DOC.6.0: study title by document structure" when + $table: Table(onPage(1), + (containsString("Final Report") || containsString("SPL")), + numberOfRows == 1, + numberOfCols == 1) + then + entityCreationService.bySemanticNode($table.getCell(0, 0).streamChildren().toList().get(1), "title", EntityType.ENTITY).ifPresent(entity -> { + entity.apply("DOC.6.0", "Study title found", "n-a"); + }); + end + + +rule "DOC.6.1: study title" + when + $table: Table(onPage(1), (containsString("Final Report") || containsString("SPL"))) + then + entityCreationService.byRegexWithLineBreaksIgnoreCase("(?<=\\n)[\\w\\W]{1,300}(?=\\nFinal Report)", "title", EntityType.ENTITY, $table).findFirst().ifPresent(entity -> { + entity.apply("DOC.6.1", "Title found", "n-a"); + }); + end + + +rule "DOC.6.2: study title" + when + not Table(onPage(1), (containsString("Final Report") || containsString("SPL"))) + $section: Section(onPage(1), (containsString("Final Report") || containsString("SPL"))) + then + entityCreationService.byRegexWithLineBreaksIgnoreCase("(?<=\\n)[\\w\\W]{1,300}(?=\\nFinal Report)", "title", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { + entity.apply("DOC.6.2", "Title found", "n-a"); + }); + end + + + +rule "DOC.7.0: Performing Laboratory (Name)" + when + $section: Section(containsString("PERFORMING LABORATORY:")) + then + entityCreationService.lineAfterString("PERFORMING LABORATORY:", "laboratory_name", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { + entity.apply("DOC.7.0", "Performing Laboratory found", "n-a"); + }); + end + + + rule "DOC.7.1: Performing Laboratory (Country)" + when + nerEntities: NerEntities(hasEntitiesOfType("COUNTRY")) + $section: Section(containsString("PERFORMING LABORATORY:")) + then + nerEntities.streamEntitiesOfType("COUNTRY") + .filter(nerEntity -> $section.getTextRange().contains(nerEntity.textRange())) + .map(nerEntity -> entityCreationService.byNerEntity(nerEntity, "laboratory_country", EntityType.ENTITY, $section)) + .forEach(entity -> { + entity.apply("DOC.7.1", "Performing Laboratory found", "n-a"); + insert(entity); + }); + end + + +rule "DOC.7.2: Performing Laboratory (Country & Name) from dict" + when + $section: Section( + (hasEntitiesOfType("laboratory_country") || hasEntitiesOfType("laboratory_name")) + && (containsString("PERFORMING LABORATORY:") || (containsString("PERFORMING") && containsString("LABORATORY:"))) + ) + then + $section.getEntitiesOfType("laboratory_country").forEach(entity -> { + entity.apply("DOC.7.2", "Performing laboratory country dictionary entry found.", "n-a"); + }); + $section.getEntitiesOfType("laboratory_name").forEach(entity -> { + entity.apply("DOC.7.2", "Performing laboratory name dictionary entry found.", "n-a"); + }); + end + + +rule "DOC.7.3: Performing Laboratory (Country) from dict" + when + $section: Section( + (hasEntitiesOfType("laboratory_country") || hasEntitiesOfType("laboratory_name")) + && !(containsString("PERFORMING LABORATORY:") || (containsString("PERFORMING") && containsString("LABORATORY:"))) + ) + then + $section.getEntitiesOfType(List.of("laboratory_country", "laboratory_name")).forEach(entity -> { + entity.removeFromGraph(); + retract(entity); + }); + end + + +rule "DOC.8.0: GLP Study" + when + $headline: Headline(containsString("GOOD LABORATORY PRACTICE COMPLIANCE") + || containsString("GOOD LABORATORY PRACTICE COMPLIANCE STATEMENT") + || (containsString("DECLARACAO DE CONFORMIDADE") && containsString("PRATICAS DE LABORATORIO")) + || containsString("GLP Certificate") + || containsString("GLP Certificates") + || containsString("GOOD LABORATORY PRACTICE (GLP) CERTIFICATE") + || containsString("Good Laboratory Practice Certificate") + || containsString("STATEMENT OF GLP COMPLIANCE AND AUTHENTICATION")) + then + entityCreationService.bySemanticNode($headline, "glp_study", EntityType.ENTITY).ifPresent(entity -> { + entity.apply("DOC.8.0", "GLP Study found", "n-a"); + }); + end + + +rule "DOC.9.0: Batch number from CoA" + when + $section: Section( + ( + anyHeadlineContainsString("Analytical Report") + || anyHeadlineContainsStringIgnoreCase("Certificate of Analysis") + || containsStringIgnoreCase("Certificate of Analysis") + ) + && ( + containsStringIgnoreCase("batch") + || containsStringIgnoreCase("bath") + || containsStringIgnoreCase("barch") + || containsStringIgnoreCase("bateb") + ) + && ( + containsStringIgnoreCase("identification") + || containsStringIgnoreCase("ldentitfication") + || containsStringIgnoreCase("wentification") + || containsStringIgnoreCase("mentification") + || containsStringIgnoreCase("kientification") + || containsStringIgnoreCase("reference number") + || containsStringIgnoreCase("test substance") + ) + ) + then + entityCreationService.lineAfterStrings(List.of("Batch Identification", + "(Batch Identification):", + "Bateb Identification", + "Batch Wentification", + "Batch Mentification", + "Batch Kientification", + "Barch Identification", + "Bath ldentitfication", + "Batch of test substance :"), "batch_number", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.9.0", "Batch number found in CoA", "n-a"); + }); + end + + +rule "DOC.9.1: Batch number" + when + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("Test Substance") + || anyHeadlineContainsStringIgnoreCase("Test and Control Substances") + || anyHeadlineContainsStringIgnoreCase("Test Item") + ) + && !( + anyHeadlineContainsString("component") + || anyHeadlineContainsString("reference") + || anyHeadlineContainsString("blank") + ) + && containsStringIgnoreCase("batch") + ) + then + Stream.of(entityCreationService.byRegex("Batch ID ([A-Z\\d\\-]{7,14})", "batch_number", EntityType.ENTITY, 1, $section), + entityCreationService.lineAfterStrings(List.of("Batch Identification", + "Batch number:", + "Batch reference number:", + "Batch:", + "Batch/Lot number:", + "Batch (Lot) Number:", + "Batch Number:", + "Batch Nº:", + "Batch no:" + ), "batch_number", EntityType.ENTITY, $section)).flatMap(a -> a) + .forEach(entity -> { + entity.apply("DOC.9.1", "Batch number found", "n-a"); + }); + end + + +rule "DOC.9.2: Batch number" + when + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("Test Substance") + || anyHeadlineContainsStringIgnoreCase("Test and Control Substances") + || anyHeadlineContainsStringIgnoreCase("Test Item") + ) + && !( + anyHeadlineContainsString("component") + || anyHeadlineContainsString("reference") + || anyHeadlineContainsString("blank") + ) + && containsStringIgnoreCase("batch") + ) + $batchNumber: String() from List.of("Batch Identification", + "Batch number:", + "Batch reference number:", + "Batch:", + "Batch/Lot number:", + "Batch (Lot) Number:", + "Batch Number:", + "Batch Nº:", + "Batch no:") + $table: Table(containsStringIgnoreCase($batchNumber)) from $section.streamAllSubNodesOfType(NodeType.TABLE).toList() + then + entityCreationService.lineAfterStringAcrossColumnsIgnoreCase($batchNumber, "batch_number", EntityType.ENTITY, $table).forEach(entity -> { + entity.apply("DOC.9.2", "Batch number found", "n-a"); + }); + end + + + + +rule "DOC.10.0: Conclusions - LD50, LC50, Confidence" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","425","436")) + $section: Section( + (getHeadline().containsStringIgnoreCase("Conclusion") || anyHeadlineContainsStringIgnoreCase("Lethality")) + && (containsString("LD") || containsString("LC") || containsString("50") || containsString("LD50") || containsString("lethal concentration") || containsString("lethal dose")) + && ( + containsString("greater than") + || containsString("higher than") + || containsString("above") + || containsString("in excess") + || containsString("exceeds") + || containsString("was found to be") + || containsString("was calculated to be") + || containsString("estimated to be") + ) + ) + then + entityCreationService.byRegexIgnoreCase("(L[D|C]\\s?50|lethal concentration|lethal dose).{1,200}(greater than|considered to be above|in excess of|exceeds|higher than)", "ld50_greater", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.apply("DOC.10.0", "LD50 greater than found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("\\b(?:(?:greater|higher) than|considered to be above|(?:was|is) (?:found|estimated) to be|was calculated to be|in excess of|exceeds|equal to)\\s?([\\d\\.]{1,6})\\s?mg\\/(?:kg|L)", "ld50_value", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.apply("DOC.10.0", "LD50 value found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("confidence interval (?:is )?([\\d\\.]{2,6}).{0,20} to (?:greater than )?([\\d\\.]{2,6})", "confidence_minimal", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.apply("DOC.10.0", "Minimal Confidence found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("confidence interval (?:is )?([\\d\\.]{2,6}).{0,20} to (?:greater than )?([\\d\\.]{2,6})", "confidence_maximal", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.apply("DOC.10.0", "Maximal Confidence found", "n-a"); + }); + end + + +rule "DOC.11.0: Guideline Deviation" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) + $section: Section( + (getHeadline().containsStringIgnoreCase("General Information") || containsString("GENERAL INFORMATION")) + && (containsStringIgnoreCase("from the") || containsStringIgnoreCase("to the")) + ) + then + entityCreationService.betweenRegexes("(?:Deviations? from the [G|g]uidelines?)(?: and| or)?( the)?(?: Study Plan)?", "(?:(?:Deviations? from the Study Plan)|(?:Performing laboratory test)|(?:Other)|(?:Retention of [S|s]amples)|(?:Amendments? to Final Protocol))", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.11.0", "Deviation from Guidelines found", "n-a"); + }); + entityCreationService.betweenRegexes("(?:Deviations? (?:from|to)(?: the)? [S|s]tudy [P|p]lan)", "(?:Regulatory Guidelines)|(?:Other)|(?:Distribution of the report)|(?:Performing laboratory test)|(?:Distribution of the report)|(?:Retention of [S|s]amples)", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.11.0", "Deviation from Study Plan found", "n-a"); + }); + entityCreationService.betweenStrings("Deviations from the study plan", "Regulatory Guidelines", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.11.0", "Deviation from the study plan found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("(?>Study plan adherence)(.{1,20}deviations.{1,20} to the study plan.{0,50}\\.)\\s", "guideline_deviation", EntityType.ENTITY, 1, $section).forEach(entity -> { + entity.apply("DOC.11.0", "Guideline deviation found in text.", "n-a"); + }); + entityCreationService.betweenStringsIncludeEnd("Deviations from the study plan", "validity of the study.", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.11.0", "Deviation from the study plan found", "n-a"); + }); + end + + +rule "DOC.11.1: Guideline Deviation in text" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) + $section: Section( + getHeadline().containsStringIgnoreCase("Introduction") + && containsStringIgnoreCase("deviations from the protocol") + ) + then + entityCreationService.byRegex("There were no deviations from the protocol.{1,100}\\.\\s", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.11.1", "Guideline deviation found in text.", "n-a"); + }); + end + + +rule "DOC.12.0: Clinical Signs" + when + FileAttribute(label == "OECD Number", value == "425") + $headline: Headline(containsAnyStringIgnoreCase("Clinical Signs", "Macroscopic Findings") && !containsString("TABLE") && !getHeadline().containsStringIgnoreCase("3 - MACROSCOPIC FINDINGS")) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "clinical_signs", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.12.0", "Clinical Signs found", "n-a")); + end + + +rule "DOC.13.0: Dosages" + when + FileAttribute(label == "OECD Number", value == "425") + $section: Section( + (anyHeadlineContainsStringIgnoreCase("Dosages") || anyHeadlineContainsStringIgnoreCase("Study Design")) + && !getHeadline().containsString("TABLE") + ) + then + entityCreationService.betweenStringsIncludeStartAndEnd("The animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.13.0", "Dosage found", "n-a"); + }); + entityCreationService.betweenStringsIncludeStartAndEnd("Animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.13.0", "Dosage found", "n-a"); + }); + entityCreationService.byRegexWithLineBreaks("(?:\\.[\\s|\\n]|^.{5,20}\\n)([^\\.]{1,200}(?:animal|given|received)[^\\.]{1,200}dose\\s(?:levels?\\s)?(?:of|at)[^\\.]{1,200})(?:\\.[\\s|\\n|$])", "dosages", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.apply("DOC.13.0", "Dosage found", "n-a"); + }); + end + + +rule "DOC.14.0: Mortality" + when + $headline: Headline(containsString("Mortality") && !containsString("TABLE")) + FileAttribute(label == "OECD Number", value == "425") + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.14.0", "Mortality found", "n-a")); + end + + +rule "DOC.15.0: Study Conclusion" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) + $section: Section( + getHeadline().containsStringIgnoreCase("Conclusion") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "study_conclusion", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.15.0", "Study Conclusion found", "n-a")); + end + + +rule "DOC.16.0: Weight Behavior Changes" + when + FileAttribute(label == "OECD Number", value == "402") + $section: Section( + getHeadline().containsString("Results") + && ( + containsString("body weight") + || containsString("body weights") + || containsString("bodyweight") + || containsString("bodyweights") + ) + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "weight_behavior_changes", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.16.0", "Weight behavior changes found", "n-a")); + end + + +rule "DOC.17.0: Necropsy findings" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","436")) + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("Necropsy") + || getHeadline().containsStringIgnoreCase("Macroscopic Findings") + || getHeadline().containsStringIgnoreCase("Macroscopic examination") + ) + && !getHeadline().containsStringIgnoreCase("Table") + && !getHeadline().containsStringIgnoreCase("Appendix") + && !getHeadline().containsStringIgnoreCase("3 - MACROSCOPIC FINDINGS") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "necropsy_findings", EntityType.ENTITY) + .forEach( entity -> entity.apply("DOC.17.0", "Necropsy section found", "n-a")); + end + + +rule "DOC.18.0: Clinical observations" + when + FileAttribute(label == "OECD Number", value == "403") + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("Clinical Observations") + || anyHeadlineContainsStringIgnoreCase("Clinical observations") + || anyHeadlineContainsStringIgnoreCase("In-life Observations") + || anyHeadlineContainsStringIgnoreCase("Postmortem Observations") + ) + && !anyHeadlineContainsStringIgnoreCase("Appendix") + && !anyHeadlineContainsStringIgnoreCase("Table") + && !anyHeadlineContainsStringIgnoreCase("Mortality") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_observations", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.18.0", "Clinical observations section found", "n-a")); + end + + +rule "DOC.19.0: Bodyweight changes" + when + FileAttribute(label == "OECD Number", value == "403") + $headline: Headline(containsAnyStringIgnoreCase("Bodyweight", "Bodyweights", "Body Weights", "Body Weight"), !containsAnyStringIgnoreCase("Appendix", "TABLE")) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "bodyweight_changes", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.19.0", "Bodyweight section found", "n-a")); + end + + +rule "DOC.20.0: Study Design" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","404","405","406","428","429","438","439","474","487")) + $section: Section( + anyHeadlineContainsStringIgnoreCase("study design") + && !anyHeadlineContainsString("Preliminary screening test") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "study_design", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.20.0", "Study design section found", "n-a")); + end + + +rule "DOC.20.1: Study Design" + when + Headline(containsStringIgnoreCase("Study Design"), $sectionIdentifier: getSectionIdentifier()) + $headline: Headline(getSectionIdentifier().isChildOf($sectionIdentifier)) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "study_design", EntityType.ENTITY) + .forEach(entity -> { + entity.apply("DOC.20.1", "Study design section found", "n-a"); + }); + end + + +rule "DOC.21.0: Results and Conclusion (406, 428, 438, 439, 474 & 487)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","474","487")) + $parentHeadline: Headline( + containsAnyString("Results", "Conclusion"), + !containsAnyString("POSITIVE CONTROL", "Positive Control", "Evaluation", "Micronucleus", "TABLE", "DISCUSSION", "CONCLUSIONS", "Interpretation","Viability", "analysis"), + $sectionIdentifier: getSectionIdentifier() + ) + not Headline(getSectionIdentifier().isChildOf($sectionIdentifier)) + then + entityCreationService.bySemanticNodeParagraphsOnly($parentHeadline.getParent(), "results_and_conclusion", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.21.0", "Results and Conclusion found", "n-a")); + end + + +rule "DOC.21.1: Results and Conclusion (406, 428, 438, 439, 474 & 487)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","474","487")) + Headline( + containsAnyString("Results", "Conclusion"), + !containsAnyString("POSITIVE CONTROL", "Positive Control", "Evaluation", "Micronucleus", "TABLE", "DISCUSSION", "CONCLUSIONS", "Interpretation","Viability", "analysis"), + $sectionIdentifier: getSectionIdentifier() + ) + $headline: Headline(getSectionIdentifier().isChildOf($sectionIdentifier)) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "results_and_conclusion", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.21.1", "Results and Conclusion found", "n-a")); + end + + +rule "DOC.22.0: Detailing (404 & 405)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("404","405")) + $section: Section( + anyHeadlineContainsStringIgnoreCase("Results") + && !getHeadline().containsStringIgnoreCase("Evaluation") + && !getHeadline().containsStringIgnoreCase("study") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "detailing", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.22.0", "Detailing found", "n-a")); + end + + +rule "DOC.23.0: Preliminary Test Results (429)" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + ((anyHeadlineContainsString("Preliminary Screening Test") && containsString("Clinical observations")) + || anyHeadlineContainsString("Pre-Experiment")) + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "preliminary_test_results", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.23.0", "Preliminary Test Results found", "n-a")); + end + + +rule "DOC.24.0: Test Results (429)" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment"))) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "test_results", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.24.0", "Test Results found", "n-a")); + end + + +rule "DOC.24.1: Test Results (429)" + when + Headline(containsStringIgnoreCase("RESULTS AND DISCUSSION"), $sectionIdentifierResultsAndDiscussion: getSectionIdentifier()) + $headline: Headline(getSectionIdentifier().isChildOf($sectionIdentifierResultsAndDiscussion)) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "test_results", EntityType.ENTITY) + .forEach(entity -> { + entity.apply("DOC.24.1", "Test Results found", "n-a"); + }); + end + + +rule "DOC.25.0: Approach used (429)" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + hasEntitiesOfType("species") + && (containsStringIgnoreCase("animals per") || containsStringIgnoreCase("animals /")) + ) + then + entityCreationService.byRegexIgnoreCase("\\banimals (?:per|\\/) .{0,15}(group)\\b", "approach_used", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.apply("DOC.25.0", "Study animal approach found.", "n-a"); + }); + end + + +rule "DOC.26.0: Sex" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("405","429")) $section: Section( ( anyHeadlineContainsStringIgnoreCase("animal") - || anyHeadlineContainsStringIgnoreCase("Species and strain") || anyHeadlineContainsStringIgnoreCase("test system") ) && !getHeadline().containsStringIgnoreCase("selection") @@ -357,231 +880,195 @@ rule "DOC.35.0: Sex" ) then entityCreationService.byRegexIgnoreCase("([S|s]ex:)?[\\w\\s]{0,10}\\b(males?|females?)\\b", "sex", EntityType.ENTITY,2, $section).forEach(entity -> { - entity.apply("DOC.35.0", "Test animal sex found", "n-a"); + entity.apply("DOC.26.0", "Test animal sex found", "n-a"); }); end - -rule "DOC.6.0: Authors" +rule "DOC.27.0: Animal Number 405" when - $headline: Headline(onPage(1), containsString("AUTHOR")) + FileAttribute(label == "OECD Number", value == "405") + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("animal") + || anyHeadlineContainsStringIgnoreCase("test system") + || anyHeadlineContainsStringIgnoreCase("reaction") + ) + && !getHeadline().containsString("selection") + && ( + containsStringIgnoreCase("number of animals") + || containsStringIgnoreCase("no.") + ) + ) then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "author", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.6.0", "Author found", "n-a")); + entityCreationService.byRegexIgnoreCase("(Number of animals:)[\\w\\s]{0,10}\\b([\\d]{1,3})\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.apply("DOC.27.0", "Number of animals found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("(?:.{1,10} No\\. )([\\d\\w\\-]{3,8})", "animal_numbers", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.apply("DOC.27.0", "Number of animals found", "n-a"); + }); end - -rule "DOC.6.2: Authors" +rule "DOC.28.0: Animal Number 429" when - $page: Page(getNumber() == 1, getMainBodyTextBlock().getSearchText() (contains "AUTHOR(S)" || contains "AUTHORS" || contains "Author"), getMainBodyTextBlock().getSearchText() (contains "STUDY COMPLETED ON" || contains "STUDY COMPLETION DATE" || contains "DATE OF INTERIM REPORT" || contains "Report completion date" || contains "Date of Report" || contains "AMENDMENT COMPLETION DATE")) + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("animal") + || anyHeadlineContainsStringIgnoreCase("test system") + ) + && !getHeadline().containsString("selection") + && containsStringIgnoreCase("number of animals") + && (containsStringIgnoreCase("per") || containsString("/")) + && containsStringIgnoreCase("group") + ) then - - List startBoundaries = new LinkedList<>(); - startBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("AUTHOR(S)", $page.getMainBodyTextBlock())); - startBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("AUTHORS", $page.getMainBodyTextBlock())); - startBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Author", $page.getMainBodyTextBlock())); - - List stopBoundaries = new LinkedList<>(); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("STUDY COMPLETED ON", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("STUDY COMPLETION DATE", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("DATE OF INTERIM REPORT", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Report completion date", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Date of Report", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("AMENDMENT COMPLETION DATE", $page.getMainBodyTextBlock())); - - entityCreationService.betweenTextRanges(startBoundaries, stopBoundaries, "author", EntityType.ENTITY, document).forEach(entity -> { - entity.apply("DOC.6.2", "Author found", "n-a"); + entityCreationService.byRegexIgnoreCase("(Number of animals per group:)[\\w\\s]{0,10}\\b([\\d]{1,3})\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.apply("DOC.28.0", "Number of animals in group found", "n-a"); }); + entityCreationService.byRegexIgnoreCase("(Number of animals per group:).{0,60}\\b([\\d]{1,3})\\sper group\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.apply("DOC.28.0", "Number of animals in group found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("([\\d]{1,3})[\\w\\s\\/]{0,20}(?:treatment )?group\\b", "number_of_animals", EntityType.ENTITY,1 , $section).forEach(entity -> { + entity.apply("DOC.28.0", "Number of animals in group found", "n-a"); + }); + end + + +rule "DOC.28.1: No. Of animals - Fallback to appendix tables listing all individual animals for 429" + when + $keyword: String() from List.of("Animal Number", "Animal No.", "Animal number") + $table: Table(containsString($keyword) && getHeadline().containsString("TABLE") && getHeadline().containsString("Individual")) + FileAttribute(label == "OECD Number", value == "429") + then + $table.streamTableCellsWithHeader($keyword) + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "animal_numbers", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(entity -> { + entity.apply("DOC.28.1", "Animal number found.", "n-a"); + insert(entity); + }); end - - -rule "DOC.6.6: laboratory_project_identification" +rule "DOC.29.0: 4h Exposure" when - $page: Page(getNumber() == 1, getMainBodyTextBlock().getSearchText() (contains "LABORATORY PROJECT IDENTIFICATION" || contains "TEST FACILITY PROJECT IDENTIFICATION" || contains "Laboratory Project Identification")) + FileAttribute(label == "OECD Number", valueEqualsAnyOf("403","436")) + $section: Section( + (containsStringIgnoreCase("4 hours") || containsStringIgnoreCase("four hours")) + ) then - List startBoundaries = new LinkedList<>(); - startBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("LABORATORY PROJECT IDENTIFICATION", $page.getMainBodyTextBlock())); - startBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("TEST FACILITY PROJECT IDENTIFICATION", $page.getMainBodyTextBlock())); - - List stopBoundaries = new LinkedList<>(); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("SPONSOR", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("VOLUME", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("This", $page.getMainBodyTextBlock())); - - entityCreationService.betweenTextRanges(startBoundaries, stopBoundaries, "laboratory_project_identification", EntityType.ENTITY, document).forEach(entity -> { - entity.apply("DOC.6.6", "Laboratory Project Identification", "n-a"); - }); - end - - - -rule "DOC.7.2: study title by document structure" - when - $page: Page(getNumber() == 1, getMainBodyTextBlock().getSearchText() (contains "STUDY TITLE" || contains "Study Title" || contains "STUDYTITLE" || contains "Report (Final)")) - then - - List startBoundaries = new LinkedList<>(); - startBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("STUDY TITLE", $page.getMainBodyTextBlock())); - startBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("STUDYTITLE", $page.getMainBodyTextBlock())); - startBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Report (Final)", $page.getMainBodyTextBlock())); - - List stopBoundaries = new LinkedList<>(); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("TEST GUIDELINES", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("TEST GUIDELINE(S)", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Guidelines", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("DATA REQUIREMENT", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("AUTHOR(S)", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("AUTHOR", $page.getMainBodyTextBlock())); - - entityCreationService.betweenTextRanges(startBoundaries, stopBoundaries, "title", EntityType.ENTITY, document).forEach(entity -> { - entity.apply("DOC.7.2", "Study title found", "n-a"); - }); - end - - - -rule "DOC.8.1: Performing Laboratory" - when - $page: Page(getNumber() == 1, getMainBodyTextBlock().getSearchText() (contains "PERFORMING LABORATORY" || contains "TEST FACILITIES" || contains "TEST FACILITY" || contains "Test Facility"), getMainBodyTextBlock().getSearchText() (contains "LABORATORY PROJECT IDENTIFICATION" || contains "TEST FACILITY PROJECT IDENTIFICATION" || contains "Sponsor")) - then - List startBoundaries = new LinkedList<>(); - startBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("PERFORMING LABORATORY", $page.getMainBodyTextBlock())); - startBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("TEST FACILITIES", $page.getMainBodyTextBlock())); - startBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("TEST FACILITY", $page.getMainBodyTextBlock())); - - List stopBoundaries = new LinkedList<>(); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("LABORATORY PROJECT IDENTIFICATION", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("TEST FACILITY PROJECT IDENTIFICATION", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("Sponsor", $page.getMainBodyTextBlock())); - stopBoundaries.addAll(RedactionSearchUtility.findTextRangesByStringIgnoreCase("PROJECT IDENTIFICATION", $page.getMainBodyTextBlock())); - - entityCreationService.betweenTextRanges(startBoundaries, stopBoundaries, "laboratory_name", EntityType.ENTITY, document).forEach(entity -> { - entity.apply("DOC.8.1", "Performing Laboratory found", "n-a"); - }); - end - - - - -rule "DOC.8.2: Summary Methods" - when - $headline: Headline(containsString("1.1. METHODS")) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "summary_methods", EntityType.ENTITY) - .filter(e -> !e.getValue().contains("Report; Project No")) - .filter(e -> !e.getValue().startsWith("This document")) - .filter(e -> !e.getValue().startsWith("Page")) - .filter(e -> !e.getValue().startsWith("2. INTRODUCTION")) - .filter(e -> !e.getValue().startsWith("BASF")) - .filter(e -> !e.getValue().startsWith("The Chemical Company")) - .filter(e -> !e.getValue().startsWith("We create chemistry")) - .forEach(entity -> entity.apply("DOC.8.2", "Summary Methods found", "n-a")); - end - -rule "DOC.8.3: Summary Observations Laboratory" - when - $headline: Headline(containsString("1.2. OBSERVATIONS")) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "summary_observations", EntityType.ENTITY) - .filter(e -> !e.getValue().contains("Report; Project No")) - .filter(e -> !e.getValue().startsWith("This document")) - .filter(e -> !e.getValue().startsWith("Page")) - .filter(e -> !e.getValue().startsWith("2. INTRODUCTION")) - .filter(e -> !e.getValue().startsWith("BASF")) - .filter(e -> !e.getValue().startsWith("The Chemical Company")) - .filter(e -> !e.getValue().startsWith("We create chemistry")) - .forEach(entity -> entity.apply("DOC.8.3", "Summary Observations found", "n-a")); - end - - -rule "DOC.8.5: Summary Results" - when - Headline((containsStringIgnoreCase("1.3. RESULTS") || containsStringIgnoreCase("1.2. RESULTS")), $sectionIdentifier: getSectionIdentifier()) - $headline: Headline(getSectionIdentifier().isChildOf($sectionIdentifier)) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "summary_results", EntityType.ENTITY) - .filter(e -> !e.getValue().contains("Report; Project No")) - .filter(e -> !e.getValue().startsWith("This document")) - .filter(e -> !e.getValue().startsWith("Page")) - .filter(e -> !e.getValue().startsWith("2. INTRODUCTION")) - .filter(e -> !e.getValue().startsWith("BASF")) - .filter(e -> !e.getValue().startsWith("The Chemical Company")) - .filter(e -> !e.getValue().startsWith("We create chemistry")) - .forEach(entity -> { - entity.apply("DOC.8.5", "Summary Results", "n-a"); - }); - end - -rule "DOC.8.6: Summary Results 2" - when - $headline: Headline(containsString("1.2. RESULTS")) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "summary_results", EntityType.ENTITY) - .filter(e -> !e.getValue().contains("Report; Project No")) - .filter(e -> !e.getValue().startsWith("This document")) - .filter(e -> !e.getValue().startsWith("Page")) - .filter(e -> !e.getValue().startsWith("2. INTRODUCTION")) - .filter(e -> !e.getValue().startsWith("BASF")) - .filter(e -> !e.getValue().startsWith("The Chemical Company")) - .filter(e -> !e.getValue().startsWith("We create chemistry")) - .forEach(entity -> entity.apply("DOC.8.6", "Summary Results", "n-a")); - end - - - -rule "DOC.8.4: Summary Conclusion" - when - $headline: Headline(containsString("1.4. CONCLUSION") || containsString("1.3. CONCLUSION")) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "study_conclusion", EntityType.ENTITY) - .filter(e -> !e.getValue().contains("Report; Project No")) - .filter(e -> !e.getValue().startsWith("This document")) - .filter(e -> !e.getValue().startsWith("Page")) - .filter(e -> !e.getValue().startsWith("2. INTRODUCTION")) - .filter(e -> !e.getValue().startsWith("BASF")) - .filter(e -> !e.getValue().startsWith("The Chemical Company")) - .filter(e -> !e.getValue().startsWith("We create chemistry")) - .forEach(entity -> entity.apply("DOC.8.4", "Summary Conculsion found", "n-a")); - end - - - -rule "DOC.9.0: GLP Study" - when - $headline: Headline(containsString("GOOD LABORATORY PRACTICE COMPLIANCE") - || containsString("GOOD LABORATORY PRACTICE COMPLIANCE STATEMENT") - || (containsString("DECLARACAO DE CONFORMIDADE") && containsString("PRATICAS DE LABORATORIO")) - || containsString("GLP Certificate") - || containsString("GLP Certificates") - || containsString("GOOD LABORATORY PRACTICE (GLP) CERTIFICATE") - || containsString("Good Laboratory Practice Certificate") - || containsString("STATEMENT OF GLP COMPLIANCE AND AUTHENTICATION") - || containsString("GLP CERTIFICATE (FROM THE COMPETENT AUTHORITY)") - || containsString("GLP COMPLIANCE STATEMENT") - || containsString("GLP STATEMENT") - ) - then - entityCreationService.bySemanticNode($headline, "glp_study", EntityType.ENTITY).ifPresent(entity -> { - entity.apply("DOC.9.0", "GLP Study found", "n-a"); + entityCreationService.byRegexIgnoreCase("((?<=\\.\\s\\b).{1,100}(4|four) hours.*?\\.) ", "4h_exposure", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.apply("DOC.29.0", "4h exposure sentence found", "n-a"); }); end - -rule "DOC.9.1: GLP Study" +rule "DOC.30.0: Dilution of the test substance" when - $paragraph: Paragraph(containsString("GLP COMPLIANCE STATEMENT")) + FileAttribute(label == "OECD Number", value == "404") + $section: Section( + getHeadline().containsString("Formulation") + && containsString("dilution") + ) then - entityCreationService.byRegex("GLP COMPLIANCE STATEMENT", "glp_study", EntityType.ENTITY, $paragraph).forEach(entity -> { - entity.apply("DOC.9.1", "GLP Study found", "n-a"); - }); + entityCreationService.bySemanticNodeParagraphsOnly($section, "dilution", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.30.0", "Dilution found.", "n-a")); end +rule "DOC.31.0: Positive Control" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + getHeadline().containsStringIgnoreCase("Positive Control") + && !(getHeadline().containsStringIgnoreCase("Appendix") || getHeadline().containsStringIgnoreCase("Table")) + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "positive_control", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.31.0", "Positive control found.", "n-a")); + end + + +rule "DOC.32.0: Mortality Statement" + when + FileAttribute(label == "OECD Number", value == "402") + $headline: Headline(containsStringIgnoreCase("Mortality") && !containsString("TABLE")) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality_statement", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.32.0", "Mortality Statement found", "n-a")); + end + + +rule "DOC.33.0: Dose Mortality" + when + FileAttribute(label == "OECD Number", value == "425") + $table: Table( + (hasHeader("Mortality") || hasHeader("Long Term Results") || hasHeader("LongTerm Outcome") || hasHeader("Long Term Outcome") || hasHeader("Comments") || hasHeader("Viability / Mortality") || hasHeader("Viability/Mortality")) + && + (hasHeader("Dose [mg/kg bodyweight]") || hasHeader("Dose [mg/kg body weight]") ||hasHeader("Dose (mg/kg)") || hasHeader("Dose levei (mg/kg)") || hasHeader("Dose Level (mg/kg)") || hasHeader("Dose level (mg/kg)") || hasHeader("Dosage [mg/kg body weight]")) + ) + then + Stream.of($table.streamTableCellsWithHeader("Mortality"), + $table.streamTableCellsWithHeader("Comments"), + $table.streamTableCellsWithHeader("Long Term Results"), + $table.streamTableCellsWithHeader("Long Term Outcome"), + $table.streamTableCellsWithHeader("LongTerm Outcome"), + $table.streamTableCellsWithHeader("Viability / Mortality"), + $table.streamTableCellsWithHeader("Viability/Mortality") + ).flatMap(a -> a) + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(entity -> { + entity.apply("DOC.33.0", "Dose Mortality Data found.", "n-a"); + insert(entity); + }); + + Stream.of($table.streamTableCellsWithHeader("Dose [mg/kg bodyweight]"), + $table.streamTableCellsWithHeader("Dose [mg/kg body weight]"), + $table.streamTableCellsWithHeader("Dose levei (mg/kg)"), + $table.streamTableCellsWithHeader("Dose Level (mg/kg)"), + $table.streamTableCellsWithHeader("Dose level (mg/kg)"), + $table.streamTableCellsWithHeader("Dose (mg/kg)"), + $table.streamTableCellsWithHeader("Dosage [mg/kg body weight]") + ).flatMap(a -> a) + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality_dose", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(entity -> { + entity.apply("DOC.33.0", "Dose Mortality Data found.", "n-a"); + insert(entity); + }); + end + + +rule "DOC.34.0: Results (Main Study)" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + getHeadline().containsString("Results") + && getHeadline().getTextRange().length() < 20 + && !(getHeadline().containsString("Appendix") || getHeadline().containsString("Table")) + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "results_(main_study)", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.34.0", "Results for main study found.", "n-a")); + end + + +rule "DOC.35.0: Doses (mg/kg bodyweight)" + when + FileAttribute(label == "OECD Number", value == "402") + $section: Section( + anyHeadlineContainsStringIgnoreCase("study design") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "doses_(mg_kg_bw)", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.35.0", "Doses per bodyweight information found", "n-a")); + end //------------------------------------ Manual redaction rules ------------------------------------ @@ -733,7 +1220,7 @@ rule "X.0.0: remove Entity contained by Entity of same type" salience 65 when $larger: TextEntity($type: type, $entityType: entityType, active()) - $contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized(), active()) + $contained: TextEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !hasManualChanges(), active()) then $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); retract($contained); @@ -745,7 +1232,7 @@ rule "X.2.0: remove Entity of type ENTITY when contained by FALSE_POSITIVE" salience 64 when $falsePositive: TextEntity($type: type, entityType == EntityType.FALSE_POSITIVE, active()) - $entity: TextEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !resized(), active()) + $entity: TextEntity(containedBy($falsePositive), type == $type, entityType == EntityType.ENTITY, !hasManualChanges(), active()) then $entity.getIntersectingNodes().forEach(node -> update(node)); $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); @@ -758,7 +1245,7 @@ rule "X.3.0: remove Entity of type RECOMMENDATION when contained by FALSE_RECOMM salience 64 when $falseRecommendation: TextEntity($type: type, entityType == EntityType.FALSE_RECOMMENDATION, active()) - $recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) then $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); retract($recommendation); @@ -770,7 +1257,7 @@ rule "X.4.0: remove Entity of type RECOMMENDATION when intersected by ENTITY wit salience 256 when $entity: TextEntity($type: type, entityType == EntityType.ENTITY, active()) - $recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !resized(), active()) + $recommendation: TextEntity(intersects($entity), type == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) then $entity.addEngines($recommendation.getEngines()); $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY with same type"); @@ -783,7 +1270,7 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" salience 256 when $entity: TextEntity(entityType == EntityType.ENTITY, active()) - $recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !resized(), active()) + $recommendation: TextEntity(containedBy($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges(), active()) then $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when contained by ENTITY"); retract($recommendation); @@ -794,7 +1281,7 @@ rule "X.5.0: remove Entity of type RECOMMENDATION when contained by ENTITY" rule "X.7.0: remove all images" salience 512 when - $image: Image(imageType != ImageType.OCR) + $image: Image(imageType != ImageType.OCR, !hasManualChanges()) then $image.remove("X.7.0", "remove all images"); retract($image); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl index 02bd91af..8080ea62 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora_components.drl @@ -47,7 +47,7 @@ declare GuidelineMapping //------------------------------------ Default Components rules ------------------------------------ -rule "StudyTitle.0.0: Study Title" +rule "StudyTitle.0.0: First Title found" when $titleCandidates: List() from collect (Entity(type == "title")) then @@ -55,7 +55,7 @@ rule "StudyTitle.0.0: Study Title" end -rule "PerformingLaboratory.1.0: Performing Laboratory" +rule "PerformingLaboratory.1.0: Performing Laboratory name and country found in same section" when $laboratoryName: Entity(type == "laboratory_name", $node: containingNode) $laboratoryCountry: Entity(type == "laboratory_country", containingNode == $node) @@ -64,7 +64,7 @@ rule "PerformingLaboratory.1.0: Performing Laboratory" componentCreationService.create("PerformingLaboratory.1.0", "Performing_Laboratory", $laboratoryName.getValue() + ", " + $laboratoryCountry.getValue(), "Laboratory name and country found!", List.of($laboratoryName, $laboratoryCountry)); end -rule "PerformingLaboratory.2.0: Performing Laboratory" +rule "PerformingLaboratory.2.0: Performing Laboratory name but no country found in same section" when $laboratoryName: Entity(type == "laboratory_name", $node: containingNode) not Entity(type == "laboratory_country", containingNode == $node) @@ -72,7 +72,7 @@ rule "PerformingLaboratory.2.0: Performing Laboratory" componentCreationService.create("PerformingLaboratory.2.0", "Performing_Laboratory", $laboratoryName.getValue(), "Only laboratory name found!", List.of($laboratoryName)); end -rule "PerformingLaboratory.0.2: Performing Laboratory" +rule "PerformingLaboratory.0.2: Performing Laboratory not found" salience -1 when not Component(name == "Performing_Laboratory") @@ -81,30 +81,30 @@ rule "PerformingLaboratory.0.2: Performing Laboratory" end -rule "ReportNumber.0.0: Report number" +rule "ReportNumber.0.0: First Report number found" when $reportNumberCandidates: List() from collect (Entity(type == "report_number")) then - componentCreationService.firstOrElse("ReportNumber.0.0", "Report_number", $reportNumberCandidates, ""); + componentCreationService.firstOrElse("ReportNumber.0.0", "Report_Number", $reportNumberCandidates, ""); end -rule "GLPStudy.0.0: GLP Study" +rule "GLPStudy.0.0: GLP Study found" when $glpStudyList: List(!isEmpty) from collect(Entity(type == "glp_study")) then - componentCreationService.create("GLPStudy.0.0", "GLP_study", "Yes", "Yes if present, No if not", $glpStudyList); + componentCreationService.create("GLPStudy.0.0", "GLP_Study", "Yes", "Yes if present, No if not", $glpStudyList); end -rule "GLPStudy.1.0: GLP Study" +rule "GLPStudy.1.0: GLP Study not found" when not Entity(type == "glp_study") then - componentCreationService.create("GLPStudy.1.0", "GLP_study", "No", "Yes if present, No if not"); + componentCreationService.create("GLPStudy.1.0", "GLP_Study", "No", "Yes if present, No if not"); end -rule "TestGuideline.0.0: create mappings" +rule "TestGuideline.0.0: create OECD number and year guideline mappings" salience 2 when Entity(type == "oecd_guideline_number") @@ -141,7 +141,7 @@ rule "TestGuideline.0.0: create mappings" insert(new GuidelineMapping("487", "2016", "Nº 487: Micronucleus Human Lymphocytes (2016)")); end -rule "TestGuideline.0.1: match test guidelines with mappings" +rule "TestGuideline.0.1: match OECD number and year with guideline mappings" salience 1 when GuidelineMapping($year: year, $number: number, $guideline: guideline) @@ -157,7 +157,7 @@ rule "TestGuideline.0.1: match test guidelines with mappings" ); end -rule "TestGuideline.1.0: no mapping found" +rule "TestGuideline.1.0: no guideline mapping found" when not Component(name == "Test_Guidelines_1") $guideLine: Entity(type == "oecd_guideline") @@ -165,23 +165,15 @@ rule "TestGuideline.1.0: no mapping found" componentCreationService.create("TestGuideline.2.0", "Test_Guidelines_1", $guideLine.getValue(), "No Mapping for OECD number and year found, using fallback instead!", List.of($guideLine)); end -rule "TestGuideline.2.0: Test Guideline 2" +rule "TestGuideline.2.0: All values of EPA guideline and EC guidelines" when - $epaGuideLines: List() from collect (Entity(type == "epa_guideline")) - $ecGuideLines: List() from collect (Entity(type == "ec_guideline")) + $guidelines: List() from collect (Entity(type == "epa_guideline" || type == "ec_guideline")) then - componentCreationService.joining("TestGuideline.2.0", - "Test_Guideline_2", - Stream.of( - $epaGuideLines.stream(), - $ecGuideLines.stream()) - .flatMap(a -> a) - .toList() - ); + componentCreationService.joining("TestGuideline.2.0", "Test_Guidelines_2", $guidelines); end -rule "StartDate.0.0: Experimental Starting Date" +rule "StartDate.0.0: All experimental start dates converted to dd/MM/yyyy" when $startDates: List(!isEmpty()) from collect (Entity(type == "experimental_start_date")) then @@ -189,7 +181,7 @@ rule "StartDate.0.0: Experimental Starting Date" end -rule "CompletionDate.0.0: Experimental Completion Date" +rule "CompletionDate.0.0: All experimental end dates converted to dd/MM/yyyy" when $endDates: List(!isEmpty()) from collect (Entity(type == "experimental_end_date")) then @@ -197,26 +189,26 @@ rule "CompletionDate.0.0: Experimental Completion Date" end -rule "AnalysisCertificate.0.0: Certificate of analysis batch identification" +rule "AnalysisCertificate.0.0: Unique values of certificate of analysis batch identification" when $batchNumbers: List(!isEmpty()) from collect (Entity(type == "batch_number")) then - componentCreationService.joiningUnique("AnalysisCertificate.0.0", "Batch_Number", $batchNumbers); + componentCreationService.joiningUnique("AnalysisCertificate.0.0", "Certificate_of_Analysis_Batch_Identification", $batchNumbers); end rule "StudyConclusion.0.0: Study conclusion in first found section" when $oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $studyConclusions: List() from collect(Entity(type == "study_conclusion")) then - componentCreationService.joiningUniqueFromFirstSectionOnly("Study_Conclusion.0.0", "Study_Conclusion", $studyConclusions); + componentCreationService.joiningFromFirstSectionOnly("StudyConclusion.0.0", "Study_Conclusion", $studyConclusions, " "); end rule "GuidelineDeviation.0.0: Guideline deviation as sentences" when $oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $guidelineDeviations: List() from collect (Entity(type == "guideline_deviation")) then componentCreationService.asSentences("GuidelineDeviation.0.0", "Deviation_from_the_Guideline", $guidelineDeviations); @@ -225,7 +217,7 @@ rule "GuidelineDeviation.0.0: Guideline deviation as sentences" rule "Species.0.0: First found species" when $oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $species: List() from collect (Entity(type == "species")) then componentCreationService.firstOrElse("Species.0.0", "Species", $species, ""); @@ -234,7 +226,7 @@ rule "Species.0.0: First found species" rule "Strain.0.0: First found strain" when $oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $strain: List() from collect (Entity(type == "strain")) then componentCreationService.firstOrElse("Strain.0.0", "Strain", $strain, ""); @@ -243,34 +235,34 @@ rule "Strain.0.0: First found strain" rule "Conclusion.0.0: Unique values of Conclusion LD50" when $oecdNumber: String() from List.of("402", "403", "425", "436") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $conclusions: List() from collect (Entity(type == "ld50_value")) then - componentCreationService.joiningUnique("Conclusion.0.0", "Conclusion_LD50_mg_per_kg", $conclusions, ""); + componentCreationService.joiningUnique("Conclusion.0.0", "Conclusion_LD50_mg_per_kg", $conclusions); end rule "Conclusion0.1.0: Greater than found" when $oecdNumber: String() from List.of("402", "403", "425", "436") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $conclusions: List(!isEmpty()) from collect (Entity(type == "ld50_greater")) then - componentCreationService.create("Conclusion.1.0", "Conclusion_LD50_Greater_than", "Greater Than", "\"Greater than\" value found", $conclusions); + componentCreationService.create("Conclusion.1.0", "Conclusion_LD50_Greater_than", "Greater than", "Entity of type 'ld50_greater' found", $conclusions); end rule "Conclusion.1.1: Greater than not found" when $oecdNumber: String() from List.of("402", "403", "425", "436") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) not Entity(type == "ld50_greater") then - componentCreationService.create("Conclusion.1.1", "Conclusion_LD50_Greater_than", "", "No \"Greater than\" value found"); + componentCreationService.create("Conclusion.1.1", "Conclusion_LD50_Greater_than", "", "No entity of type 'ld50_greater' found"); end rule "Conclusion.2.0: Minimum confidence as unique values" when $oecdNumber: String() from List.of("402", "403", "425", "436") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $conclusions: List() from collect (Entity(type == "confidence_minimal")) then componentCreationService.joiningUnique("Conclusion.2.0", "Conclusion_Minimum_Confidence", $conclusions); @@ -279,7 +271,7 @@ rule "Conclusion.2.0: Minimum confidence as unique values" rule "Conclusion.3.0: Maximum confidence as unique values" when $oecdNumber: String() from List.of("402", "403", "425", "436") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $conclusions: List() from collect (Entity(type == "confidence_maximal")) then componentCreationService.joiningUnique("Conclusion.3.0", "Conclusion_Maximum_Confidence", $conclusions); @@ -287,15 +279,23 @@ rule "Conclusion.3.0: Maximum confidence as unique values" rule "Necropsy.0.0: Necropsy findings from longest section" when - FileAttribute(label == "oecd_number", value == "402") + FileAttribute(label == "OECD Number", value == "402") $necropsies: List() from collect (Entity(type == "necropsy_findings")) then - componentCreationService.joiningFromLongestSectionOnly("Necropsy.0.0", "Necropsy_Findings", $necropsies); + componentCreationService.joiningFromLongestSectionOnly("Necropsy.0.0", "Necropsy_Findings", $necropsies, " "); + end + +rule "Necropsy.0.1: Necropsy findings from longest section" + when + FileAttribute(label == "OECD Number", value == "403" || value == "436") + $necropsies: List() from collect (Entity(type == "necropsy_findings")) + then + componentCreationService.asSentences("Necropsy.0.0", "Necropsy_Findings", $necropsies); end rule "Necropsy.1.0: Doses mg per kg of Bodyweight as one block" when - FileAttribute(label == "oecd_number", value == "402") + FileAttribute(label == "OECD Number", value == "402") $dosages: List() from collect (Entity(type == "doses_(mg_kg_bw)")) then componentCreationService.joining("Necropsy.1.0", "Doses_mg_per_kg_bw", $dosages, " "); @@ -304,7 +304,7 @@ rule "Necropsy.1.0: Doses mg per kg of Bodyweight as one block" rule "Necropsy.2.0: Necropsy findings as one block" when $oecdNumber: String() from List.of("403", "436") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $necropsies: List() from collect (Entity(type == "necropsy_findings")) then componentCreationService.joining("Necropsy.2.0", "Necropsy_Findings", $necropsies, " "); @@ -313,7 +313,7 @@ rule "Necropsy.2.0: Necropsy findings as one block" rule "Necropsy.3.0: Conducted with 4 hours of exposure as one block" when $oecdNumber: String() from List.of("403", "436") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $exposures: List() from collect (Entity(type == "4h_exposure")) then componentCreationService.joining("Necropsy.3.0", "Conducted_with_4_Hours_of_Exposure", $exposures, " "); @@ -322,7 +322,7 @@ rule "Necropsy.3.0: Conducted with 4 hours of exposure as one block" rule "StudyDesign.0.0: Study design as one block" when $oecdNumber: String() from List.of("404", "405", "429", "406", "428", "438", "439", "474", "487") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $studyDesigns: List() from collect (Entity(type == "study_design")) then componentCreationService.joining("StudyDesign.0.0", "Study_Design", $studyDesigns, " "); @@ -331,7 +331,7 @@ rule "StudyDesign.0.0: Study design as one block" rule "Results.0.0: Results and conclusions as joined values" when $oecdNumber: String() from List.of("406", "428", "438", "439", "474", "487") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $results: List() from collect (Entity(type == "results_and_conclusion")) then componentCreationService.joining("Results.0.0", "Results_and_Conclusions", $results, " "); @@ -339,7 +339,7 @@ rule "Results.0.0: Results and conclusions as joined values" rule "WeightBehavior.0.0: Weight change behavior as sentences" when - FileAttribute(label == "oecd_number", value == "402") + FileAttribute(label == "OECD Number", value == "402") $weightChanges: List() from collect (Entity(type == "weight_behavior_changes")) then componentCreationService.asSentences("WeightBehavior.0.0", "Weight_Behavior_Changes", $weightChanges); @@ -347,7 +347,7 @@ rule "WeightBehavior.0.0: Weight change behavior as sentences" rule "MortalityStatement.0.0: Mortality statements as one block" when - FileAttribute(label == "oecd_number", value == "402") + FileAttribute(label == "OECD Number", value == "402") $mortalityStatements: List() from collect (Entity(type == "mortality_statement")) then componentCreationService.joining("MortalityStatement.0.0", "Mortality_Statement", $mortalityStatements, " "); @@ -355,7 +355,7 @@ rule "MortalityStatement.0.0: Mortality statements as one block" rule "ClinicalObservations.0.0: Clinical observations as sentences" when - FileAttribute(label == "oecd_number", value == "403") + FileAttribute(label == "OECD Number", value == "403") $observations: List() from collect (Entity(type == "clinical_observations")) then componentCreationService.asSentences("MortalityStatement.0.0", "Clinical_Observations", $observations); @@ -363,7 +363,7 @@ rule "ClinicalObservations.0.0: Clinical observations as sentences" rule "BodyWeight.0.0: Bodyweight changes as sentences" when - FileAttribute(label == "oecd_number", value == "403") + FileAttribute(label == "OECD Number", value == "403") $weightChanges: List() from collect (Entity(type == "bodyweight_changes")) then componentCreationService.asSentences("BodyWeight.0.0", "Body_Weight_Changes", $weightChanges); @@ -372,7 +372,7 @@ rule "BodyWeight.0.0: Bodyweight changes as sentences" rule "Detailing.0.0: Detailing of reported changes as one block" when $oecdNumber: String() from List.of("404", "405") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $detailings: List() from collect (Entity(type == "detailing")) then componentCreationService.joining("Detailing.0.0", "Detailing_of_Reported_Changes", $detailings, " "); @@ -381,8 +381,8 @@ rule "Detailing.0.0: Detailing of reported changes as one block" rule "Sex.0.0: Male sex found" when $oecdNumber: String() from List.of("405", "429") - FileAttribute(label == "oecd_number", value == $oecdNumber) - $males: List(!isEmpty) from collect (Entity(type == "sex", (value == "male" || value == "males"))) + FileAttribute(label == "OECD Number", value == $oecdNumber) + $males: List(!isEmpty) from collect (Entity(type == "sex", (value.toLowerCase() == "male" || value.toLowerCase() == "males"))) then componentCreationService.create("Sex.0.0", "Sex", "male", "male sex found", $males); end @@ -390,8 +390,8 @@ rule "Sex.0.0: Male sex found" rule "Sex.1.0: Female sex found" when $oecdNumber: String() from List.of("405", "429") - FileAttribute(label == "oecd_number", value == $oecdNumber) - $females: List(!isEmpty) from collect (Entity(type == "sex", (value == "female" || value == "females"))) + FileAttribute(label == "OECD Number", value == $oecdNumber) + $females: List(!isEmpty) from collect (Entity(type == "sex", (value.toLowerCase() == "female" || value.toLowerCase() == "females"))) then componentCreationService.create("Sex.0.0", "Sex", "female", "female sex found", $females); end @@ -399,7 +399,7 @@ rule "Sex.1.0: Female sex found" rule "NumberOfAnimals.0.0: Number of animals found" when $oecdNumber: String() from List.of("405", "429") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $numberOfAnimals: Entity(type == "number_of_animals") then componentCreationService.create("NumberOfAnimals.0.0", "Number_of_Animals", $numberOfAnimals.getValue(), "Number of animals found directly", $numberOfAnimals); @@ -408,7 +408,7 @@ rule "NumberOfAnimals.0.0: Number of animals found" rule "NumberOfAnimals.1.0: Count unique occurences of animals" when $oecdNumber: String() from List.of("405", "429") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) not Entity(type == "number_of_animals") $animals: List() from collect (Entity(type == "animal_number")) then @@ -418,16 +418,16 @@ rule "NumberOfAnimals.1.0: Count unique occurences of animals" rule "ClinicalSigns.0.0: Clinical signs as sentences" when $oecdNumber: String() from List.of("425") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $clinicalSigns: List() from collect (Entity(type == "clinical_signs")) then componentCreationService.asSentences("ClinicalSigns.0.0", "Clinical_Signs", $clinicalSigns); end -rule "DoseMortality.0.0: Dose mortality as sentences" +rule "DoseMortality.0.0: Dose mortality joined with dose from same table row" when $oecdNumber: String() from List.of("425") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $doseMortalities: List() from collect (Entity(type == "dose_mortality" || type == "dose_mortality_dose")) then componentCreationService.joiningFromSameTableRow("DoseMortality.0.0", "Dose_Mortality", $doseMortalities); @@ -436,7 +436,7 @@ rule "DoseMortality.0.0: Dose mortality as sentences" rule "Mortality.0.0: Mortality as one block" when $oecdNumber: String() from List.of("425") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $mortalities: List() from collect (Entity(type == "mortality")) then componentCreationService.joining("Mortality.0.0", "Mortality", $mortalities, " "); @@ -445,8 +445,8 @@ rule "Mortality.0.0: Mortality as one block" rule "Dosages.0.0: First found value of Dosages" when $oecdNumber: String() from List.of("425") - FileAttribute(label == "oecd_number", value == $oecdNumber) - $mortalities: List() from collect (Entity(type == "mortality")) + FileAttribute(label == "OECD Number", value == $oecdNumber) + $mortalities: List() from collect (Entity(type == "dosages")) then componentCreationService.firstOrElse("Dosages.0.0", "Dosages", $mortalities, ""); end @@ -454,7 +454,7 @@ rule "Dosages.0.0: First found value of Dosages" rule "PrelimResults.0.0: Preliminary test results as sentences" when $oecdNumber: String() from List.of("429") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $results: List() from collect (Entity(type == "preliminary_test_results")) then componentCreationService.asSentences("PrelimResults.0.0", "Preliminary_Test_Results", $results); @@ -463,7 +463,7 @@ rule "PrelimResults.0.0: Preliminary test results as sentences" rule "TestResults.0.0: Test results as one block" when $oecdNumber: String() from List.of("429") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $results: List() from collect (Entity(type == "test_results")) then componentCreationService.joining("TestResults.0.0", "Test_Results", $results, " "); @@ -472,34 +472,34 @@ rule "TestResults.0.0: Test results as one block" rule "PositiveControl.0.0: Was the definitive study conducted with positive control" when $oecdNumber: String() from List.of("429") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $results: List() from collect (Entity(type == "positive_control")) then componentCreationService.joining("PositiveControl.0.0", "Was_the_definitive_study_conducted_with_positive_control", $results, " "); end -rule "MainResults.0.0: Was the definitive study conducted with positive control" +rule "MainResults.0.0: Results from main study as one block" when $oecdNumber: String() from List.of("429") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $results: List() from collect (Entity(type == "results_(main_study)")) then componentCreationService.joining("MainResults.0.0", "Results_Main_Study", $results, " "); end -rule "UsedApproach.0.0: Was the definitive study conducted with positive control" +rule "UsedApproach.0.0: Used approach found and mapped to 'Group'" when $oecdNumber: String() from List.of("429") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) $results: List(!isEmpty()) from collect (Entity(type == "approach_used")) then componentCreationService.create("UsedApproach.0.0", "What_was_the_approach_used", "Group", "'Group' when approach used is present, else 'Individual'", $results); end -rule "UsedApproach.1.0: Was the definitive study conducted with positive control" +rule "UsedApproach.1.0: Used approach not found and thus 'Individual'" when $oecdNumber: String() from List.of("429") - FileAttribute(label == "oecd_number", value == $oecdNumber) + FileAttribute(label == "OECD Number", value == $oecdNumber) not Entity(type == "approach_used") then componentCreationService.create("UsedApproach.1.0", "What_was_the_approach_used", "Individual", "'Group' when approach used is present, else 'Individual'"); @@ -513,11 +513,10 @@ rule "DefaultComponents.999.0: Create components for all unmapped entities." then componentCreationService.createComponentsForUnMappedEntities("DefaultComponents.999.0", $allEntities); end - */ //------------------------------------ Component merging rules ------------------------------------ - +/* rule "X.0.0: merge duplicate component references" when $first: Component() @@ -526,3 +525,4 @@ rule "X.0.0: merge duplicate component references" $first.getReferences().addAll($duplicate.getReferences()); retract($duplicate); end +*/ \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/402Study.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/402Study.pdf new file mode 100644 index 00000000..289b46cf Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/new/402Study.pdf differ