From 9dfb0f49c63f11dcd91ec493570b06c74c535861 Mon Sep 17 00:00:00 2001 From: Andrei Isvoran Date: Tue, 5 Mar 2024 13:49:08 +0100 Subject: [PATCH] RED-8632 - Generate javadoc automatically --- .gitlab-ci.yml | 15 + .../build.gradle.kts | 39 + .../resources/drools/all_rules_documine.drl | 1613 +++++++++++++++++ .../services/DroolsUpToDateTest.java | 15 +- 4 files changed, 1680 insertions(+), 2 deletions(-) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4f7f7901..47ad3fcc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -21,3 +21,18 @@ deploy: - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH - if: $CI_COMMIT_BRANCH =~ /^release/ - if: $CI_COMMIT_TAG + +generateJavaDoc: + stage: build + tags: + - dind + script: + - echo "Generating Javadoc..." + - gradle generateJavaDoc -PjavadocDestinationDir="javadoc" + artifacts: + paths: + - redaction-service-v1/redaction-service-server-v1/javadoc/* + rules: + - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH + - if: $CI_COMMIT_BRANCH =~ /^release/ + - if: $CI_COMMIT_TAG \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts index 59bdcaf3..ec8ef756 100644 --- a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts @@ -113,3 +113,42 @@ tasks.named("bootBuildImage") { tags.set(listOf(dockerTag)) } } + +fun parseDroolsImports(droolsFilePath: String): List { + + val imports = mutableListOf() + val importPattern = Regex("^import\\s+(com\\.iqser\\.red\\.service\\.redaction\\.v1\\.[\\w.]+);") + val desiredPrefix = "com.iqser.red.service.redaction.v1" + + File(droolsFilePath).forEachLine { line -> + importPattern.find(line)?.let { matchResult -> + val importPath = matchResult.groupValues[1].trim() + if (importPath.startsWith(desiredPrefix)) { + val formattedPath = importPath.replace('.', '/') + imports.add("$formattedPath.java") + } + } + } + + return imports +} + +val droolsImports = parseDroolsImports("redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl") + +tasks.register("generateJavaDoc", Javadoc::class) { + + dependsOn("compileJava") + dependsOn("delombok") + classpath = project.sourceSets["main"].runtimeClasspath + source = fileTree("${buildDir}/generated/sources/delombok/java/main") { + include(droolsImports) + } + destinationDir = file(project.findProperty("javadocDestinationDir")?.toString() ?: "") + + options.memberLevel = JavadocMemberLevel.PUBLIC + (options as StandardJavadocDocletOptions).apply { + header = "Redaction Service ${project.version}" + footer = "Redaction Service ${project.version}" + title = "API Documentation for Redaction Service ${project.version}" + } +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl b/redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl new file mode 100644 index 00000000..e0cf99ae --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl @@ -0,0 +1,1613 @@ +package drools + +import static java.lang.String.format; +import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.anyMatch; +import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.exactMatch; + +import java.util.List; +import java.util.LinkedList; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.Collection; +import java.util.stream.Stream; +import java.util.Optional; + +import com.iqser.red.service.redaction.v1.server.model.document.*; +import com.iqser.red.service.redaction.v1.server.model.document.TextRange; +import com.iqser.red.service.redaction.v1.server.model.document.entity.*; +import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; +import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity +import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule +import com.iqser.red.service.redaction.v1.server.model.document.nodes.*; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.SectionIdentifier; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.*; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConcatenatedTextBlock; +import com.iqser.red.service.redaction.v1.server.model.NerEntities; +import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; +import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel; +import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; +import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService; +import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange; + +import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine; + +global Document document +global EntityCreationService entityCreationService +global ManualChangesApplicationService manualChangesApplicationService +global Dictionary dictionary + +//------------------------------------ queries ------------------------------------ + +query "getFileAttributes" + $fileAttribute: FileAttribute() + end + +//------------------------------------ H rules ------------------------------------ + +// Rule unit: H.0 +rule "H.0.0: retract table of contents page" + when + $page: Page(getMainBodyTextBlock().getSearchText().contains("........") || (getMainBodyTextBlock().getSearchText().contains("APPENDICES") && getMainBodyTextBlock().getSearchText().contains("TABLES"))) + $node: SemanticNode(onPage($page.getNumber()), !onPage($page.getNumber() -1), getType() != NodeType.IMAGE) + then + retract($node); + end + + +// Rule unit: H.1 +rule "H.1.0: Ignore Table of Contents" + salience 10 + when + $tocHeadline: Headline(containsString("CONTENTS")) + $page: Page() from $tocHeadline.getParent().getPages() + $node: SemanticNode(this != $tocHeadline, getType() != NodeType.IMAGE, onPage($page.getNumber()), !onPage($page.getNumber() -1)) + then + retract($node); + end + + +// Rule unit: H.2 +rule "H.2.0: Show headlines" + when + $headline: Headline() + then + entityCreationService.bySemanticNode($headline, "headline", EntityType.HINT); + end + + +// Rule unit: H.3 +rule "H.3.0: Study Type File Attribute" + when + not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) + $section: Section( + (containsString("DATA REQUIREMENT") || containsString("TEST GUIDELINE") || containsString("MÉTODO(S) DE REFERÊNCIA(S):")) + ,(containsString("OECD") || containsString("EPA") || containsString("OPPTS")) + ) + then + Stream.of(RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|(?:.{5,40}(?:Number |Procedure |Guideline )))(4[\\d]{2})", 1, $section.getTextBlock()), + RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", 1, $section.getTextBlock()), + RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD) Guideline (4\\d{2})", 1, $section.getTextBlock())).flatMap(Collection::stream).findFirst() + .map(textRange -> $section.getTextBlock().subSequence(textRange).toString()) + .map(value -> FileAttribute.builder().label("OECD Number").value(value).build()) + .ifPresent(fileAttribute -> insert(fileAttribute)); + end + +rule "H.3.1: Study Type File Attribute in Headlines" + when + not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) + $page: Page($pageNumber:number, + getMainBodyTextBlock().getSearchText().contains("DATA REQUIREMENT") + || getMainBodyTextBlock().getSearchText().contains("TEST GUIDELINE") + || getMainBodyTextBlock().getSearchText().contains("MÉTODO(S) DE REFERÊNCIA(S):")) + $headline: Headline(onPage($pageNumber), containsString("OECD") || containsString("EPA")|| containsString("OPPTS")) + then + Stream.of(RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|(?:.{5,40}(?:Number |Procedure |Guideline )))(4[\\d]{2})", 1, $headline.getTextBlock()), + RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", 1, $headline.getTextBlock()), + RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD) Guideline (4\\d{2})", 1, $headline.getTextBlock())).flatMap(Collection::stream).findFirst() + .map(textRange -> $headline.getTextBlock().subSequence(textRange).toString()) + .map(value -> FileAttribute.builder().label("OECD Number").value(value).build()) + .ifPresent(fileAttribute -> insert(fileAttribute)); + end + +//------------------------------------ General documine rules ------------------------------------ + +// Rule unit: DOC.1 +rule "DOC.1.0: Guidelines" + when + $section: Section( + ( + containsString("DATA REQUIREMENT") + || containsString("TEST GUIDELINE") + || containsString("MÉTODO(S) DE REFERÊNCIA(S):") + ) + && ( + containsString("OECD") + || containsString("EPA") + || containsString("OPPTS") + ) + ) + then + entityCreationService.byRegex("OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", "oecd_guideline", EntityType.ENTITY, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline found", "n-a") + ); + entityCreationService.byRegex("OECD[\\s,]{1}(?:.{1,40}.(?>Procedure|Method).{1,20}\\d{3,4}(?>.{1,100}\\d{4}\\))?|\\[.{1,20}.Skin.{1,20}\\]|[\\d\\s,\\(\\)]{7,10}|[\\w\\.\\s]{1,15}[\\d]{3}\\s\\(\\d{4}\\)|.{0,20}[N|n]umber\\s\\d{3}.{0,1}|Test Guideline \\d{3})", "oecd_guideline", EntityType.ENTITY, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline found", "n-a") + ); + entityCreationService.byRegex("EPA (OPPTS )?\\d{3}[. ]\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $section).forEach(entity -> + entity.apply("DOC.1.0", "EPA Guideline found", "n-a") + ); + entityCreationService.byRegex("EC (Directive )?(No\\.? )?\\d{3,4}\\/\\d{3,4}((,? B(\\.| )\\d{1,2}\\.?)? \\(\\d{4}\\))?", "ec_guideline", EntityType.ENTITY, $section).forEach(entity -> + entity.apply("DOC.1.0", "EC Guideline found", "n-a") + ); + entityCreationService.byRegex("Commission Regulation \\(EC\\) No \\d{3}\\/\\d{4}", "ec_guideline", EntityType.ENTITY, $section).forEach(entity -> + entity.apply("DOC.1.0", "EC Guideline found", "n-a") + ); + entityCreationService.byRegex("OECD Method 4\\d{2}.{5,40}\\(.{5,40}\\d{4}\\)", "oecd_guideline", EntityType.ENTITY, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("OPPTS (Guideline Number )?\\d{3}\\.\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $section).forEach(entity -> + entity.apply("DOC.1.0", "EPA Guideline found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY,1, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline no. found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", "oecd_guideline_year", EntityType.ENTITY,2, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", "oecd_guideline_year", EntityType.ENTITY,1, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_number", EntityType.ENTITY,1, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline number found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY,2, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD) Guideline (4\\d{2})", "oecd_guideline_number", EntityType.ENTITY,1, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline number found", "n-a") + ); + entityCreationService.byRegex("OECD Guideline 4\\d{2}", "oecd_guideline", EntityType.ENTITY, $section).forEach(entity -> + entity.apply("DOC.1.0", "OECD Guideline found", "n-a") + ); + end + +rule "DOC.1.2: Guidelines" + when + $section: Section( + ( + containsString("DATA REQUIREMENT") + || containsString("TEST GUIDELINE") + || containsString("MÉTODO(S) DE REFERÊNCIA(S):") + ) + && ( + containsString("OECD") + || containsString("EPA") + || containsString("OPPTS") + ) + && ( + hasEntitiesOfType("oecd_guideline") + || hasEntitiesOfType("epa_guideline") + || hasEntitiesOfType("ec_guideline") + ) + ) + then + $section.getEntitiesOfType(List.of("oecd_guideline","ec_guideline", "epa_guideline")).forEach(entity -> { + entity.apply("DOC.1.2", "OECD guideline found.", "n-a"); + }); + end + +rule "DOC.1.3: Guidelines" + when + $section: Section( + ( + hasEntitiesOfType("oecd_guideline") + || hasEntitiesOfType("epa_guideline") + || hasEntitiesOfType("ec_guideline") + ) + && !( + ( + containsString("DATA REQUIREMENT") + || containsString("TEST GUIDELINE") + || containsString("MÉTODO(S) DE REFERÊNCIA(S):") + ) + && ( + containsString("OECD") + || containsString("EPA") + || containsString("OPPTS") + ) + ) + ) + then + $section.getEntitiesOfType(List.of("oecd_guideline", "ec_guideline", "epa_guideline")).forEach(entity -> { + entity.remove("DOC.1.3", "removed"); + retract(entity); + }); + end + +rule "DOC.1.4: Guideline in Headlines" + when + $page: Page($pageNumber:number, + getMainBodyTextBlock().getSearchText().contains("DATA REQUIREMENT") + || getMainBodyTextBlock().getSearchText().contains("TEST GUIDELINE") + || getMainBodyTextBlock().getSearchText().contains("MÉTODO(S) DE REFERÊNCIA(S):")) + $headline: Headline(onPage($pageNumber), containsString("OECD") || containsString("EPA")|| containsString("OPPTS")) + then + entityCreationService.byRegex("OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline found", "n-a") + ); + entityCreationService.byRegex("OECD[\\s,]{1}(?:.{1,40}.(?>Procedure|Method).{1,20}\\d{3,4}(?>.{1,100}\\d{4}\\))?|\\[.{1,20}.Skin.{1,20}\\]|[\\d\\s,\\(\\)]{7,10}|[\\w\\.\\s]{1,15}[\\d]{3}\\s\\(\\d{4}\\)|.{0,20}[N|n]umber\\s\\d{3}.{0,1}|Test Guideline \\d{3})", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline found", "n-a") + ); + entityCreationService.byRegex("EPA (OPPTS )?\\d{3}[. ]\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "EPA Guideline found", "n-a") + ); + entityCreationService.byRegex("EC (Directive )?(No\\.? )?\\d{3,4}\\/\\d{3,4}((,? B(\\.| )\\d{1,2}\\.?)? \\(\\d{4}\\))?", "ec_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "EC Guideline found", "n-a") + ); + entityCreationService.byRegex("Commission Regulation \\(EC\\) No \\d{3}\\/\\d{4}", "ec_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "EC Guideline found", "n-a") + ); + entityCreationService.byRegex("OECD Method 4\\d{2}.{5,40}\\(.{5,40}\\d{4}\\)", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("OPPTS (Guideline Number )?\\d{3}\\.\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "EPA Guideline found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY,1, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline no. found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", "oecd_guideline_year", EntityType.ENTITY,2, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", "oecd_guideline_year", EntityType.ENTITY,1, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_number", EntityType.ENTITY,1, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline number found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY,2, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD) Guideline (4\\d{2})", "oecd_guideline_number", EntityType.ENTITY,1, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline number found", "n-a") + ); + entityCreationService.byRegex("OECD Guideline 4\\d{2}", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline found", "n-a") + ); + end + +// Rule unit: DOC.2 +rule "DOC.2.0: Report number" + when + $section: Section(containsString("LABORATORY PROJECT ID") , containsString("Report Number:")) + then + entityCreationService.lineAfterString("Report Number:", "report_number", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { + entity.apply("DOC.2.0", "Report number found", "n-a"); + }); + end + + +// Rule unit: DOC.3 +rule "DOC.3.0: Experimental Starting Date" + when + $section: Section(containsString("Experimental I. Starting Date:") || containsString("Experimental II. Starting Date:") || containsStringIgnoreCase("experimental start date") || containsStringIgnoreCase("experimental starting date")) + then + entityCreationService.lineAfterStrings( + List.of("Experimental start date", + "Experimental start date:", + "Experimental Starting Date", + "Experimental Starting Date:", + "Experimental starting date", + "Experimental starting date:", + "Experimental Start Date", + "Experimental Start Date:", + "Experimental I. Starting Date:", + "Experimental II. Starting Date:"), "experimental_start_date", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.3.0", "Experimental start date found", "n-a"); + }); + end + + +// Rule unit: DOC.4 +rule "DOC.4.0: Experimental Completion Date" + when + $section: Section(containsStringIgnoreCase("experimental termination date") || containsStringIgnoreCase("experimental completion date")) + then + entityCreationService.lineAfterStrings( + List.of("Experimental termination date", + "Experimental termination date:", + "Experimental Completion Date", + "Experimental Completion Date:", + "Experimental completion date", + "Experimental completion date:", + "Experimental Termination Date", + "Experimental Termination Date:"), "experimental_end_date", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.4.0", "Experimental end date found", "n-a"); + }); + end + + +// Rule unit: DOC.5 +rule "DOC.5.0: Ignore species and strain in irrelevant study types" + salience 1 + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","471","474","487")) + $section: Section(hasEntitiesOfType("species") || hasEntitiesOfType("strain")) + then + $section.getEntitiesOfType(List.of("species", "strain")).forEach(entity -> { + entity.remove("DOC.5.0", "removed"); + retract(entity); + }); + end + +rule "DOC.5.1: Hide all skipped species and strains except in the relevant sections" + salience 1 + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) + $section: Section( + (hasEntitiesOfType("species") || hasEntitiesOfType("strain")) + && !( + anyHeadlineContainsStringIgnoreCase("test system") + || anyHeadlineContainsStringIgnoreCase("animals") + || anyHeadlineContainsStringIgnoreCase("specification") + ) + ) + then + $section.getEntitiesOfType(List.of("species", "strain")).forEach(entity -> { + entity.remove("DOC.5.1", "removed"); + retract(entity); + }); + end + +rule "DOC.5.2: Species" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) + $section: Section(hasEntitiesOfType("species")) + then + $section.getEntitiesOfType("species").forEach(entity -> { + entity.apply("DOC.5.2", "Species found.", "n-a"); + entity.setValue(entity.getValue().toLowerCase()); + }); + end + +rule "DOC.5.3: Strain" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) + $section: Section( + hasEntitiesOfType("species") + && hasEntitiesOfType("strain") + && ( + anyHeadlineContainsStringIgnoreCase("test system") + || anyHeadlineContainsStringIgnoreCase("animals") + || anyHeadlineContainsStringIgnoreCase("specification") + ) + ) + then + $section.getEntitiesOfType("strain").forEach(entity -> { + entity.apply("DOC.5.3", "Strain found.", "n-a"); + }); + end + + +// Rule unit: DOC.6 +rule "DOC.6.0: study title by document structure" + when + $table: Table(onPage(1), + (containsString("Final Report") || containsString("SPL")), + numberOfRows == 1, + numberOfCols == 1) + $tableCell: TableCell(row == 1, col == 1) from $table.streamTableCells().toList() + $paragraph: Paragraph(previousSibling.isPresent(), nextSibling.isPresent()) from $tableCell.streamChildren().toList() + then + entityCreationService.bySemanticNode($paragraph, "title", EntityType.ENTITY).ifPresent(entity -> { + entity.apply("DOC.6.0", "Study title found", "n-a"); + }); + end + +rule "DOC.6.1: study title" + when + $table: Table(onPage(1), (containsString("Final Report") || containsString("SPL"))) + then + entityCreationService.byRegexWithLineBreaksIgnoreCase("(?<=\\n)[\\w\\W]{1,300}(?=\\nFinal Report)", "title", EntityType.ENTITY, $table).findFirst().ifPresent(entity -> { + entity.apply("DOC.6.1", "Title found", "n-a"); + }); + end + +rule "DOC.6.2: study title" + when + not Table(onPage(1), (containsString("Final Report") || containsString("SPL"))) + $section: Section(onPage(1), (containsString("Final Report") || containsString("SPL"))) + then + entityCreationService.byRegexWithLineBreaksIgnoreCase("(?<=\\n)[\\w\\W]{1,300}(?=\\nFinal Report)", "title", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { + entity.apply("DOC.6.2", "Title found", "n-a"); + }); + end + + +// Rule unit: DOC.7 +rule "DOC.7.0: Performing Laboratory (Name)" + when + $section: Section(containsString("PERFORMING LABORATORY:")) + then + entityCreationService.lineAfterString("PERFORMING LABORATORY:", "laboratory_name", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { + entity.apply("DOC.7.0", "Performing Laboratory found", "n-a"); + }); + end + +rule "DOC.7.1: Performing Laboratory (Country)" + when + nerEntities: NerEntities(hasEntitiesOfType("COUNTRY")) + $section: Section(containsString("PERFORMING LABORATORY:")) + then + nerEntities.streamEntitiesOfType("COUNTRY") + .filter(nerEntity -> $section.getTextRange().contains(nerEntity.textRange())) + .map(nerEntity -> entityCreationService.optionalByNerEntity(nerEntity, "laboratory_country", EntityType.ENTITY, $section)) + .flatMap(Optional::stream) + .collect(Collectors.toList()) + .forEach(entity -> { + entity.apply("DOC.7.1", "Performing Laboratory found", "n-a"); + }); + end + +rule "DOC.7.2: Performing Laboratory (Country & Name) from dict" + when + $section: Section(containsString("PERFORMING LABORATORY:") || (containsString("PERFORMING") && containsString("LABORATORY:"))) + $countryOrNameFromDictionary: TextEntity(type() == "laboratory_country" || type() == "laboratory_name", $type: type, isDictionaryEntry()) from $section.getEntities() + then + $countryOrNameFromDictionary.apply("DOC.7.2", "Performing " + $type + " dictionary entry found."); + end + +rule "DOC.7.3: Performing Laboratory (Country) from dict" + when + $section: Section( + (hasEntitiesOfType("laboratory_country") || hasEntitiesOfType("laboratory_name")) + && !(containsString("PERFORMING LABORATORY:") || (containsString("PERFORMING") && containsString("LABORATORY:"))) + ) + then + $section.getEntitiesOfType(List.of("laboratory_country", "laboratory_name")).forEach(entity -> { + entity.remove("DOC.7.3", "removed"); + retract(entity); + }); + end + + +// Rule unit: DOC.8 +rule "DOC.8.0: GLP Study" + when + $headline: Headline(containsString("GOOD LABORATORY PRACTICE COMPLIANCE") + || containsString("GOOD LABORATORY PRACTICE COMPLIANCE STATEMENT") + || (containsString("DECLARACAO DE CONFORMIDADE") && containsString("PRATICAS DE LABORATORIO")) + || containsString("GLP Certificate") + || containsString("GLP Certificates") + || containsString("GOOD LABORATORY PRACTICE (GLP) CERTIFICATE") + || containsString("Good Laboratory Practice Certificate") + || containsString("STATEMENT OF GLP COMPLIANCE AND AUTHENTICATION")) + then + entityCreationService.bySemanticNode($headline, "glp_study", EntityType.ENTITY).ifPresent(entity -> { + entity.apply("DOC.8.0", "GLP Study found", "n-a"); + }); + end + + +// Rule unit: DOC.9 +rule "DOC.9.0: Batch number from CoA" + when + $section: Section( + ( + anyHeadlineContainsString("Analytical Report") + || anyHeadlineContainsStringIgnoreCase("Certificate of Analysis") + || containsStringIgnoreCase("Certificate of Analysis") + ) + && ( + containsStringIgnoreCase("batch") + || containsStringIgnoreCase("bath") + || containsStringIgnoreCase("barch") + || containsStringIgnoreCase("bateb") + ) + && ( + containsStringIgnoreCase("identification") + || containsStringIgnoreCase("ldentitfication") + || containsStringIgnoreCase("wentification") + || containsStringIgnoreCase("mentification") + || containsStringIgnoreCase("kientification") + || containsStringIgnoreCase("reference number") + || containsStringIgnoreCase("test substance") + ) + ) + then + entityCreationService.lineAfterStrings(List.of("Batch Identification", + "(Batch Identification):", + "Bateb Identification", + "Batch Wentification", + "Batch Mentification", + "Batch Kientification", + "Barch Identification", + "Bath ldentitfication", + "Batch of test substance :"), "batch_number", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.9.0", "Batch number found in CoA", "n-a"); + }); + end + +rule "DOC.9.1: Batch number" + when + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("Test Substance") + || anyHeadlineContainsStringIgnoreCase("Test and Control Substances") + || anyHeadlineContainsStringIgnoreCase("Test Item") + ) + && !( + anyHeadlineContainsString("component") + || anyHeadlineContainsString("reference") + || anyHeadlineContainsString("blank") + ) + && containsStringIgnoreCase("batch") + ) + then + Stream.of(entityCreationService.byRegex("Batch ID ([A-Z\\d\\-]{7,14})", "batch_number", EntityType.ENTITY, 1, $section), + entityCreationService.lineAfterStrings(List.of("Batch Identification", + "Batch number:", + "Batch reference number:", + "Batch:", + "Batch/Lot number:", + "Batch (Lot) Number:", + "Batch Number:", + "Batch Nº:", + "Batch no:" + ), "batch_number", EntityType.ENTITY, $section)).flatMap(a -> a) + .forEach(entity -> { + entity.apply("DOC.9.1", "Batch number found", "n-a"); + }); + end + +rule "DOC.9.2: Batch number" + when + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("Test Substance") + || anyHeadlineContainsStringIgnoreCase("Test and Control Substances") + || anyHeadlineContainsStringIgnoreCase("Test Item") + ) + && !( + anyHeadlineContainsString("component") + || anyHeadlineContainsString("reference") + || anyHeadlineContainsString("blank") + ) + && containsStringIgnoreCase("batch") + ) + $batchNumber: String() from List.of("Batch Identification", + "Batch number:", + "Batch reference number:", + "Batch:", + "Batch/Lot number:", + "Batch (Lot) Number:", + "Batch Number:", + "Batch Nº:", + "Batch no:") + $table: Table(containsStringIgnoreCase($batchNumber)) from $section.streamAllSubNodesOfType(NodeType.TABLE).toList() + then + entityCreationService.lineAfterStringAcrossColumnsIgnoreCase($batchNumber, "batch_number", EntityType.ENTITY, $table).forEach(entity -> { + entity.apply("DOC.9.2", "Batch number found", "n-a"); + }); + end + + +// Rule unit: DOC.10 +rule "DOC.10.0: Conclusions - LD50, LC50, Confidence" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","425","436")) + $section: Section( + (getHeadline().containsStringIgnoreCase("Conclusion") || anyHeadlineContainsStringIgnoreCase("Lethality")) + && (containsString("LD") || containsString("LC") || containsString("50") || containsString("LD50") || containsString("lethal concentration") || containsString("lethal dose")) + && ( + containsString("greater than") + || containsString("higher than") + || containsString("above") + || containsString("in excess") + || containsString("exceeds") + || containsString("was found to be") + || containsString("was calculated to be") + || containsString("estimated to be") + ) + ) + then + entityCreationService.byRegexIgnoreCase("(L[D|C]\\s?50|lethal concentration|lethal dose).{1,200}(greater than|considered to be above|in excess of|exceeds|higher than)", "ld50_greater", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.apply("DOC.10.0", "LD50 greater than found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("\\b(?:(?:greater|higher) than|considered to be above|(?:was|is) (?:found|estimated) to be|was calculated to be|in excess of|exceeds|equal to)\\s?([\\d\\.]{1,6})\\s?mg\\/(?:kg|L)", "ld50_value", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.apply("DOC.10.0", "LD50 value found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("confidence interval (?:is )?([\\d\\.]{2,6}).{0,20} to (?:greater than )?([\\d\\.]{2,6})", "confidence_minimal", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.apply("DOC.10.0", "Minimal Confidence found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("confidence interval (?:is )?([\\d\\.]{2,6}).{0,20} to (?:greater than )?([\\d\\.]{2,6})", "confidence_maximal", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.apply("DOC.10.0", "Maximal Confidence found", "n-a"); + }); + end + + +// Rule unit: DOC.11 +rule "DOC.11.0: Guideline Deviation" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) + $section: Section( + (getHeadline().containsStringIgnoreCase("General Information") || containsString("GENERAL INFORMATION")) + && (containsStringIgnoreCase("from the") || containsStringIgnoreCase("to the")) + ) + then + entityCreationService.betweenRegexes("(?:Deviations? from the [G|g]uidelines?)(?: and| or)?( the)?(?: Study Plan)?", "(?:(?:Deviations? from the Study Plan)|(?:Performing laboratory test)|(?:Other)|(?:Retention of [S|s]amples)|(?:Amendments? to Final Protocol))", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.11.0", "Deviation from Guidelines found", "n-a"); + }); + entityCreationService.betweenRegexes("(?:Deviations? (?:from|to)(?: the)? [S|s]tudy [P|p]lan)", "(?:Regulatory Guidelines)|(?:Other)|(?:Distribution of the report)|(?:Performing laboratory test)|(?:Distribution of the report)|(?:Retention of [S|s]amples)", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.11.0", "Deviation from Study Plan found", "n-a"); + }); + entityCreationService.betweenStrings("Deviations from the study plan", "Regulatory Guidelines", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.11.0", "Deviation from the study plan found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("(?>Study plan adherence)(.{1,20}deviations.{1,20} to the study plan.{0,50}\\.)\\s", "guideline_deviation", EntityType.ENTITY, 1, $section).forEach(entity -> { + entity.apply("DOC.11.0", "Guideline deviation found in text.", "n-a"); + }); + entityCreationService.betweenStringsIncludeEnd("Deviations from the study plan", "validity of the study.", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.11.0", "Deviation from the study plan found", "n-a"); + }); + end + +rule "DOC.11.1: Guideline Deviation in text" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) + $section: Section( + getHeadline().containsStringIgnoreCase("Introduction") + && containsStringIgnoreCase("deviations from the protocol") + ) + then + entityCreationService.byRegex("There were no deviations from the protocol.{1,100}\\.\\s", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.11.1", "Guideline deviation found in text.", "n-a"); + }); + end + + +// Rule unit: DOC.12 +rule "DOC.12.0: Clinical Signs" + when + FileAttribute(label == "OECD Number", value == "425") + $headline: Headline(containsAnyStringIgnoreCase("Clinical Signs", "Macroscopic Findings") && !containsString("TABLE") && !getHeadline().containsStringIgnoreCase("3 - MACROSCOPIC FINDINGS")) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "clinical_signs", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.12.0", "Clinical Signs found", "n-a")); + end + + +// Rule unit: DOC.13 +rule "DOC.13.0: Dosages" + when + FileAttribute(label == "OECD Number", value == "425") + $section: Section( + (anyHeadlineContainsStringIgnoreCase("Dosages") || anyHeadlineContainsStringIgnoreCase("Study Design")) + && !getHeadline().containsString("TABLE") + ) + then + entityCreationService.betweenStringsIncludeStartAndEnd("The animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.13.0", "Dosage found", "n-a"); + }); + entityCreationService.betweenStringsIncludeStartAndEnd("Animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> { + entity.apply("DOC.13.0", "Dosage found", "n-a"); + }); + entityCreationService.byRegexWithLineBreaks("(?:\\.[\\s|\\n]|^.{5,20}\\n)([^\\.]{1,200}(?:animal|given|received)[^\\.]{1,200}dose\\s(?:levels?\\s)?(?:of|at)[^\\.]{1,200})(?:\\.[\\s|\\n|$])", "dosages", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.apply("DOC.13.0", "Dosage found", "n-a"); + }); + end + + +// Rule unit: DOC.14 +rule "DOC.14.0: Mortality" + when + $headline: Headline(containsString("Mortality") && !containsString("TABLE")) + FileAttribute(label == "OECD Number", value == "425") + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.14.0", "Mortality found", "n-a")); + end + + +// Rule unit: DOC.15 +rule "DOC.15.0: Study Conclusion" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) + $section: Section( + getHeadline().containsStringIgnoreCase("Conclusion") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "study_conclusion", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.15.0", "Study Conclusion found", "n-a")); + end + + +// Rule unit: DOC.16 +rule "DOC.16.0: Weight Behavior Changes" + when + FileAttribute(label == "OECD Number", value == "402") + $section: Section( + getHeadline().containsString("Results") + && ( + containsString("body weight") + || containsString("body weights") + || containsString("bodyweight") + || containsString("bodyweights") + ) + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "weight_behavior_changes", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.16.0", "Weight behavior changes found", "n-a")); + end + + +// Rule unit: DOC.17 +rule "DOC.17.0: Necropsy findings" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","436")) + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("Necropsy") + || getHeadline().containsStringIgnoreCase("Macroscopic Findings") + || getHeadline().containsStringIgnoreCase("Macroscopic examination") + ) + && !getHeadline().containsStringIgnoreCase("Table") + && !getHeadline().containsStringIgnoreCase("Appendix") + && !getHeadline().containsStringIgnoreCase("3 - MACROSCOPIC FINDINGS") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "necropsy_findings", EntityType.ENTITY) + .forEach( entity -> entity.apply("DOC.17.0", "Necropsy section found", "n-a")); + end + + +// Rule unit: DOC.18 +rule "DOC.18.0: Clinical observations" + when + FileAttribute(label == "OECD Number", value == "403") + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("Clinical Observations") + || anyHeadlineContainsStringIgnoreCase("Clinical observations") + || anyHeadlineContainsStringIgnoreCase("In-life Observations") + || anyHeadlineContainsStringIgnoreCase("Postmortem Observations") + ) + && !anyHeadlineContainsStringIgnoreCase("Appendix") + && !anyHeadlineContainsStringIgnoreCase("Table") + && !anyHeadlineContainsStringIgnoreCase("Mortality") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_observations", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.18.0", "Clinical observations section found", "n-a")); + end + + +// Rule unit: DOC.19 +rule "DOC.19.0: Bodyweight changes" + when + FileAttribute(label == "OECD Number", value == "403") + $headline: Headline(containsAnyStringIgnoreCase("Bodyweight", "Bodyweights", "Body Weights", "Body Weight"), !containsAnyStringIgnoreCase("Appendix", "TABLE")) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "bodyweight_changes", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.19.0", "Bodyweight section found", "n-a")); + end + + +// Rule unit: DOC.20 +rule "DOC.20.0: Study Design" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","404","405","406","428","429","438","439","474","487")) + $section: Section( + anyHeadlineContainsStringIgnoreCase("study design") + && !anyHeadlineContainsString("Preliminary screening test") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "study_design", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.20.0", "Study design section found", "n-a")); + end + +rule "DOC.20.1: Study Design" + when + Headline(containsStringIgnoreCase("Study Design"), $sectionIdentifier: getSectionIdentifier()) + $headline: Headline(getSectionIdentifier().isChildOf($sectionIdentifier)) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "study_design", EntityType.ENTITY) + .forEach(entity -> { + entity.apply("DOC.20.1", "Study design section found", "n-a"); + }); + end + + +// Rule unit: DOC.21 +rule "DOC.21.0: Results and Conclusion (406, 428, 438, 439, 474 & 487)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","474","487")) + $parentHeadline: Headline( + containsAnyString("Results", "Conclusion"), + !containsAnyString("POSITIVE CONTROL", "Positive Control", "Evaluation", "Micronucleus", "TABLE", "DISCUSSION", "CONCLUSIONS", "Interpretation","Viability", "analysis"), + $sectionIdentifier: getSectionIdentifier() + ) + not Headline(getSectionIdentifier().isChildOf($sectionIdentifier)) + then + entityCreationService.bySemanticNodeParagraphsOnly($parentHeadline.getParent(), "results_and_conclusion", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.21.0", "Results and Conclusion found", "n-a")); + end + +rule "DOC.21.1: Results and Conclusion (406, 428, 438, 439, 474 & 487)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","474","487")) + Headline( + containsAnyString("Results", "Conclusion"), + !containsAnyString("POSITIVE CONTROL", "Positive Control", "Evaluation", "Micronucleus", "TABLE", "DISCUSSION", "CONCLUSIONS", "Interpretation","Viability", "analysis"), + $sectionIdentifier: getSectionIdentifier() + ) + $headline: Headline(getSectionIdentifier().isChildOf($sectionIdentifier)) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "results_and_conclusion", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.21.1", "Results and Conclusion found", "n-a")); + end + + +// Rule unit: DOC.22 +rule "DOC.22.0: Detailing (404 & 405)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("404","405")) + $section: Section( + anyHeadlineContainsStringIgnoreCase("Results") + && !getHeadline().containsStringIgnoreCase("Evaluation") + && !getHeadline().containsStringIgnoreCase("study") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "detailing", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.22.0", "Detailing found", "n-a")); + end + + +// Rule unit: DOC.23 +rule "DOC.23.0: Preliminary Test Results (429)" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + ((anyHeadlineContainsString("Preliminary Screening Test") && containsString("Clinical observations")) + || anyHeadlineContainsString("Pre-Experiment")) + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "preliminary_test_results", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.23.0", "Preliminary Test Results found", "n-a")); + end + + +// Rule unit: DOC.24 +rule "DOC.24.0: Test Results (429)" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment"))) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "test_results", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.24.0", "Test Results found", "n-a")); + end + +rule "DOC.24.1: Test Results (429)" + when + Headline(containsStringIgnoreCase("RESULTS AND DISCUSSION"), $sectionIdentifierResultsAndDiscussion: getSectionIdentifier()) + $headline: Headline(getSectionIdentifier().isChildOf($sectionIdentifierResultsAndDiscussion)) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "test_results", EntityType.ENTITY) + .forEach(entity -> { + entity.apply("DOC.24.1", "Test Results found", "n-a"); + }); + end + + +// Rule unit: DOC.25 +rule "DOC.25.0: Approach used (429)" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + hasEntitiesOfType("species") + && (containsStringIgnoreCase("animals per") || containsStringIgnoreCase("animals /")) + ) + then + entityCreationService.byRegexIgnoreCase("\\banimals (?:per|\\/) .{0,15}(group)\\b", "approach_used", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.apply("DOC.25.0", "Study animal approach found.", "n-a"); + }); + end + + +// Rule unit: DOC.26 +rule "DOC.26.0: Sex" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("405","429")) + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("animal") + || anyHeadlineContainsStringIgnoreCase("test system") + ) + && !getHeadline().containsStringIgnoreCase("selection") + && ( + containsStringIgnoreCase("sex:") + || containsStringIgnoreCase("male") + || containsStringIgnoreCase("female") + ) + ) + then + entityCreationService.byRegexIgnoreCase("([S|s]ex:)?[\\w\\s]{0,10}\\b(males?|females?)\\b", "sex", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.apply("DOC.26.0", "Test animal sex found", "n-a"); + }); + end + + +// Rule unit: DOC.27 +rule "DOC.27.0: Animal Number 405" + when + FileAttribute(label == "OECD Number", value == "405") + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("animal") + || anyHeadlineContainsStringIgnoreCase("test system") + || anyHeadlineContainsStringIgnoreCase("reaction") + ) + && !getHeadline().containsString("selection") + && ( + containsStringIgnoreCase("number of animals") + || containsStringIgnoreCase("no.") + ) + ) + then + entityCreationService.byRegexIgnoreCase("(Number of animals:)[\\w\\s]{0,10}\\b([\\d]{1,3})\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.apply("DOC.27.0", "Number of animals found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("(?:.{1,10} No\\. )([\\d\\w\\-]{3,8})", "animal_numbers", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.apply("DOC.27.0", "Number of animals found", "n-a"); + }); + end + + +// Rule unit: DOC.28 +rule "DOC.28.0: Animal Number 429" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + ( + anyHeadlineContainsStringIgnoreCase("animal") + || anyHeadlineContainsStringIgnoreCase("test system") + ) + && !getHeadline().containsString("selection") + && containsStringIgnoreCase("number of animals") + && (containsStringIgnoreCase("per") || containsString("/")) + && containsStringIgnoreCase("group") + ) + then + entityCreationService.byRegexIgnoreCase("(Number of animals per group:)[\\w\\s]{0,10}\\b([\\d]{1,3})\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.apply("DOC.28.0", "Number of animals in group found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("(Number of animals per group:).{0,60}\\b([\\d]{1,3})\\sper group\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.apply("DOC.28.0", "Number of animals in group found", "n-a"); + }); + entityCreationService.byRegexIgnoreCase("([\\d]{1,3})[\\w\\s\\/]{0,20}(?:treatment )?group\\b", "number_of_animals", EntityType.ENTITY,1 , $section).forEach(entity -> { + entity.apply("DOC.28.0", "Number of animals in group found", "n-a"); + }); + end + +rule "DOC.28.1: No. Of animals - Fallback to appendix tables listing all individual animals for 429" + when + $keyword: String() from List.of("Animal Number", "Animal No.", "Animal number") + $table: Table(containsString($keyword) && getHeadline().containsString("TABLE") && getHeadline().containsString("Individual")) + FileAttribute(label == "OECD Number", value == "429") + then + $table.streamTableCellsWithHeader($keyword) + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "animal_numbers", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(entity -> { + entity.apply("DOC.28.1", "Animal number found.", "n-a"); + }); + end + + +// Rule unit: DOC.29 +rule "DOC.29.0: 4h Exposure" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("403","436")) + $section: Section( + (containsStringIgnoreCase("4 hours") || containsStringIgnoreCase("four hours")) + ) + then + entityCreationService.byRegexIgnoreCase("((?<=\\.\\s\\b).{1,100}(4|four) hours.*?\\.) ", "4h_exposure", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.apply("DOC.29.0", "4h exposure sentence found", "n-a"); + }); + end + + +// Rule unit: DOC.30 +rule "DOC.30.0: Dilution of the test substance" + when + FileAttribute(label == "OECD Number", value == "404") + $section: Section( + getHeadline().containsString("Formulation") + && containsString("dilution") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "dilution", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.30.0", "Dilution found.", "n-a")); + end + + +// Rule unit: DOC.31 +rule "DOC.31.0: Positive Control" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + getHeadline().containsStringIgnoreCase("Positive Control") + && !(getHeadline().containsStringIgnoreCase("Appendix") || getHeadline().containsStringIgnoreCase("Table")) + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "positive_control", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.31.0", "Positive control found.", "n-a")); + end + + +// Rule unit: DOC.32 +rule "DOC.32.0: Mortality Statement" + when + FileAttribute(label == "OECD Number", value == "402") + $headline: Headline(containsStringIgnoreCase("Mortality") && !containsString("TABLE")) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality_statement", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.32.0", "Mortality Statement found", "n-a")); + end + + +// Rule unit: DOC.33 +rule "DOC.33.0: Dose Mortality" + when + FileAttribute(label == "OECD Number", value == "425") + $table: Table( + (hasHeader("Mortality") || hasHeader("Long Term Results") || hasHeader("LongTerm Outcome") || hasHeader("Long Term Outcome") || hasHeader("Comments") || hasHeader("Viability / Mortality") || hasHeader("Viability/Mortality")) + && + (hasHeader("Dose [mg/kg bodyweight]") || hasHeader("Dose [mg/kg body weight]") ||hasHeader("Dose (mg/kg)") || hasHeader("Dose levei (mg/kg)") || hasHeader("Dose Level (mg/kg)") || hasHeader("Dose level (mg/kg)") || hasHeader("Dosage [mg/kg body weight]")) + ) + then + Stream.of($table.streamTableCellsWithHeader("Mortality"), + $table.streamTableCellsWithHeader("Comments"), + $table.streamTableCellsWithHeader("Long Term Results"), + $table.streamTableCellsWithHeader("Long Term Outcome"), + $table.streamTableCellsWithHeader("LongTerm Outcome"), + $table.streamTableCellsWithHeader("Viability / Mortality"), + $table.streamTableCellsWithHeader("Viability/Mortality") + ).flatMap(a -> a) + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(entity -> { + entity.apply("DOC.33.0", "Dose Mortality Data found.", "n-a"); + }); + + Stream.of($table.streamTableCellsWithHeader("Dose [mg/kg bodyweight]"), + $table.streamTableCellsWithHeader("Dose [mg/kg body weight]"), + $table.streamTableCellsWithHeader("Dose levei (mg/kg)"), + $table.streamTableCellsWithHeader("Dose Level (mg/kg)"), + $table.streamTableCellsWithHeader("Dose level (mg/kg)"), + $table.streamTableCellsWithHeader("Dose (mg/kg)"), + $table.streamTableCellsWithHeader("Dosage [mg/kg body weight]") + ).flatMap(a -> a) + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality_dose", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(entity -> { + entity.apply("DOC.33.0", "Dose Mortality Data found.", "n-a"); + }); + end + + +// Rule unit: DOC.34 +rule "DOC.34.0: Results (Main Study)" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + getHeadline().containsString("Results") + && getHeadline().getTextRange().length() < 20 + && !(getHeadline().containsString("Appendix") || getHeadline().containsString("Table")) + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "results_(main_study)", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.34.0", "Results for main study found.", "n-a")); + end + + +// Rule unit: DOC.35 +rule "DOC.35.0: Doses (mg/kg bodyweight)" + when + FileAttribute(label == "OECD Number", value == "402") + $section: Section( + anyHeadlineContainsStringIgnoreCase("study design") + ) + then + entityCreationService.bySemanticNodeParagraphsOnly($section, "doses_(mg_kg_bw)", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.35.0", "Doses per bodyweight information found", "n-a")); + end + +//------------------------------------ Table extraction rules ------------------------------------ + +// Rule unit: TAB.0 +rule "TAB.0.0: Study Type File Attribute" + when + not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) + $section: Section(containsAnyString("DATA REQUIREMENT", "TEST GUIDELINE", "MÉTODO(S) DE REFERÊNCIA(S):") + && containsAnyString("OECD", "EPA", "OPPTS")) + then + RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|(?:.{5,40}(?:Number |Procedure |Guideline )))(4[\\d]{2})", 1 ,$section.getTextBlock()).stream() + .map(boundary -> $section.getTextBlock().subSequence(boundary).toString()) + .map(value -> FileAttribute.builder().label("OECD Number").value(value).build()) + .forEach(fileAttribute -> insert(fileAttribute)); + RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", 1, $section.getTextBlock()).stream() + .map(boundary -> $section.getTextBlock().subSequence(boundary).toString()) + .map(value -> FileAttribute.builder().label("OECD Number").value(value).build()) + .forEach(fileAttribute -> insert(fileAttribute)); + end + +rule "TAB.0.1: Guidelines" + when + $section: Section(containsAnyString("DATA REQUIREMENT", "TEST GUIDELINE", "MÉTODO(S) DE REFERÊNCIA(S):") && containsAnyString("OECD", "EPA", "OPPTS")) + then + entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY, 1, $section) + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline no. found")); + entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", "oecd_guideline_year", EntityType.ENTITY, 2, $section) + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found")); + entityCreationService.byRegex("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", "oecd_guideline_year", EntityType.ENTITY, 1, $section) + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found")); + entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_number", EntityType.ENTITY, 1, $section) + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline number found")); + entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY, 2, $section) + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found")); + end + + +// Rule unit: TAB.1 +rule "TAB.1.0: Full Table extraction (Guideline Deviation)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Full Table")) + $table: Table() from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList() + $tableCell: TableCell(!header) from $table.streamTableCells().toList() + then + entityCreationService.bySemanticNode($tableCell, "full_table_row", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.1.0", "full table extracted")); + end + + +// Rule unit: TAB.2 +rule "TAB.2.0: Individual row extraction (Clinical Signs)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Individual Rows Extraction")) + $table: Table(hasHeader("Animal No."), (hasRowWithHeaderAndAnyValue("Animal No.", List.of("120-2", "120-5")))) from $section.streamChildren().toList() + TableCell($row: row, containsAnyString("120-2", "120-5")) from $table.streamTableCellsWithHeader("Animal No.").toList() + $tableCell: TableCell($row == row) from $table.streamTableCells().toList() + then + entityCreationService.bySemanticNode($tableCell, "clinical_signs", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.2.0", "Individual row based on animal number")); + end + + +// Rule unit: TAB.3 +rule "TAB.3.0: Individual column extraction (Strain)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Individual Column")) + $table: Table(hasHeader("Sex")) from $section.streamChildren().toList() + then + $table.streamTableCellsWithHeader("Sex") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dosages", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.apply("TAB.3.0", "Individual column based on column header")); + end + + +// Rule unit: TAB.4 +rule "TAB.4.0: Combined Columns Extraction - Sex and Dosage" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Combined Columns")) + $table: Table(hasHeader("Dosage (mg/kg bw)")) from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList() + then + $table.streamTableCellsWithHeader("Dosage (mg/kg bw)") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality_dose", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.apply("TAB.4.0", "Dose Mortality dose found.")); + end + +rule "TAB.4.1: Combined Columns Extraction - Sex and Mortality" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Combined Columns")) + $table: Table(hasHeader("Mortality")) from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList() + then + $table.streamTableCellsWithHeader("Mortality") + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> redactionEntity.apply("TAB.4.1", "Dose Mortality found.")); + end + + +// Rule unit: TAB.5 +rule "TAB.5.0: Targeted cell extraction" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Value Extraction")) + $table: Table(hasHeader("Mortality"), hasRowWithHeaderAndAnyValue("Sex", List.of("male", "Male")), hasRowWithHeaderAndValue("Mortality", "Survived")) from $section.streamChildren().toList() + TableCell(containsWordIgnoreCase("Male"), $row: row) from $table.streamTableCellsWithHeader("Sex").toList() + TableCell($row == row, containsStringIgnoreCase("Survived")) from $table.streamTableCellsWithHeader("Mortality").toList() + $dosageCell: TableCell($row == row) from $table.streamTableCellsWithHeader("Dosage").toList() + then + entityCreationService.bySemanticNode($dosageCell,"doses_mg_kg_bw", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.5.0", "Dosage found in row with survived male")); + end + + +// Rule unit: TAB.6 +rule "TAB.6.0: Targeted cell extraction (Experimental Stop date)" + when + $section: Section(getHeadline().containsString("Advanced Table Extraction"), containsAllStrings("female", "Female", "Survived", "Group 2")) + $table: Table(hasHeader("Group 2")) from $section.streamChildren().toList() + TableCell(containsWordIgnoreCase("Female"), $row: row) from $table.streamTableCellsWithHeader("Group 2").toList() + TableCell($row == row, containsStringIgnoreCase("Survived")) from $table.streamTableCellsWithHeader("Group 2").toList() + $femaleSurvived: TableCell($row == row) from $table.streamTableCellsWithHeader("Group 2").toList() + then + entityCreationService.bySemanticNode($femaleSurvived, "experiment_female_survived", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.6.0", "Female in group to experimental start date")); + end + + +// Rule unit: TAB.7 +rule "TAB.7.0: Indicator (Species)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Entity-Based")) + $table: Table() from $section.streamAllSubNodesOfType(NodeType.TABLE).toList() + TableCell(isHeader(), containsString("Title"), $col: col) from $table.streamTableCells().toList() + TableCell(hasEntitiesOfType("vertebrates"), $row: row) from $table.streamTableCells().toList() + $cell: TableCell($col == col, $row == row) from $table.streamTableCells().toList() + then + entityCreationService.bySemanticNode($cell, "study_design", EntityType.ENTITY) + .ifPresent(redactionEntity -> redactionEntity.apply("TAB.7.0", "Vertebrate study found")); + end + +//------------------------------------ Manual redaction rules ------------------------------------ + +// Rule unit: MAN.0 +rule "MAN.0.0: Apply manual resize redaction" + salience 128 + when + $resizeRedaction: ManualResizeRedaction($id: annotationId, $requestDate: requestDate) + not ManualResizeRedaction(annotationId == $id, requestDate.isBefore($requestDate)) + $entityToBeResized: TextEntity(matchesAnnotationId($id)) + then + manualChangesApplicationService.resize($entityToBeResized, $resizeRedaction); + retract($resizeRedaction); + update($entityToBeResized); + $entityToBeResized.getIntersectingNodes().forEach(node -> update(node)); + end + +rule "MAN.0.1: Apply manual resize redaction" + salience 128 + when + $resizeRedaction: ManualResizeRedaction($id: annotationId, $requestDate: requestDate) + not ManualResizeRedaction(annotationId == $id, requestDate.isBefore($requestDate)) + $imageToBeResized: Image(id == $id) + then + manualChangesApplicationService.resizeImage($imageToBeResized, $resizeRedaction); + retract($resizeRedaction); + update($imageToBeResized); + update($imageToBeResized.getParent()); + end + + +// Rule unit: MAN.1 +rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" + salience 128 + when + $idRemoval: IdRemoval($id: annotationId, !removeFromDictionary, !removeFromAllDossiers) + $entityToBeRemoved: TextEntity(matchesAnnotationId($id)) + then + $entityToBeRemoved.getManualOverwrite().addChange($idRemoval); + update($entityToBeRemoved); + retract($idRemoval); + $entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node)); + end + +rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image" + salience 128 + when + $idRemoval: IdRemoval($id: annotationId) + $imageEntityToBeRemoved: Image($id == id) + then + $imageEntityToBeRemoved.getManualOverwrite().addChange($idRemoval); + update($imageEntityToBeRemoved); + retract($idRemoval); + update($imageEntityToBeRemoved.getParent()); + end + + +// Rule unit: MAN.2 +rule "MAN.2.0: Apply force redaction" + salience 128 + when + $force: ManualForceRedaction($id: annotationId) + $entityToForce: TextEntity(matchesAnnotationId($id)) + then + $entityToForce.getManualOverwrite().addChange($force); + update($entityToForce); + $entityToForce.getIntersectingNodes().forEach(node -> update(node)); + retract($force); + end + +rule "MAN.2.1: Apply force redaction to images" + salience 128 + when + $force: ManualForceRedaction($id: annotationId) + $imageToForce: Image(id == $id) + then + $imageToForce.getManualOverwrite().addChange($force); + update($imageToForce); + update($imageToForce.getParent()); + retract($force); + end + + +// Rule unit: MAN.3 +rule "MAN.3.0: Apply entity recategorization" + salience 128 + when + $recategorization: ManualRecategorization($id: annotationId, $type: type, $requestDate: requestDate) + not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) + $entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type() != $type) + then + $entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node)); + $entityToBeRecategorized.getManualOverwrite().addChange($recategorization); + update($entityToBeRecategorized); + retract($recategorization); + end + +rule "MAN.3.1: Apply entity recategorization of same type" + salience 128 + when + $recategorization: ManualRecategorization($id: annotationId, $type: type, $requestDate: requestDate) + not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) + $entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type() == $type) + then + $entityToBeRecategorized.getManualOverwrite().addChange($recategorization); + retract($recategorization); + end + +rule "MAN.3.2: Apply image recategorization" + salience 128 + when + $recategorization: ManualRecategorization($id: annotationId, $requestDate: requestDate) + not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) + $imageToBeRecategorized: Image($id == id) + then + manualChangesApplicationService.recategorize($imageToBeRecategorized, $recategorization); + update($imageToBeRecategorized); + update($imageToBeRecategorized.getParent()); + retract($recategorization); + end + +rule "MAN.3.3: Apply recategorization entities by default" + salience 128 + when + $entity: TextEntity(getManualOverwrite().getRecategorized().orElse(false), !dictionary.isHint(type())) + then + $entity.apply("MAN.3.3", "Recategorized entities are applied by default.", $entity.legalBasis()); + end + +// Rule unit: MAN.4 +rule "MAN.4.0: Apply legal basis change" + salience 128 + when + $legalBasisChange: ManualLegalBasisChange($id: annotationId) + $imageToBeRecategorized: Image($id == id) + then + $imageToBeRecategorized.getManualOverwrite().addChange($legalBasisChange); + update($imageToBeRecategorized) + retract($legalBasisChange) + end + +rule "MAN.4.1: Apply legal basis change" + salience 128 + when + $legalBasisChange: ManualLegalBasisChange($id: annotationId) + $entityToBeChanged: TextEntity(matchesAnnotationId($id)) + then + $entityToBeChanged.getManualOverwrite().addChange($legalBasisChange); + update($entityToBeChanged) + retract($legalBasisChange) + end + + +//------------------------------------ Entity merging rules ------------------------------------ + +// Rule unit: X.0 +rule "X.0.0: Remove Entity contained by Entity of same type" + salience 65 + when + $larger: TextEntity($type: type(), $entityType: entityType, active()) + $contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges()) + then + $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); + retract($contained); + end + + +// Rule unit: X.2 +rule "X.2.0: Remove Entity of type ENTITY when contained by FALSE_POSITIVE" + salience 64 + when + $falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active()) + $entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges()) + then + $entity.getIntersectingNodes().forEach(node -> update(node)); + $entity.remove("X.2.0", "remove Entity of type ENTITY when contained by FALSE_POSITIVE"); + retract($entity) + end + + +// Rule unit: X.3 +rule "X.3.0: Remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION" + salience 64 + when + $falseRecommendation: TextEntity($type: type(), entityType == EntityType.FALSE_RECOMMENDATION, active()) + $recommendation: TextEntity(containedBy($falseRecommendation), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) + then + $recommendation.remove("X.3.0", "remove Entity of type RECOMMENDATION when contained by FALSE_RECOMMENDATION"); + retract($recommendation); + end + + +// Rule unit: X.4 +rule "X.4.0: Remove Entity of type RECOMMENDATION when text range equals ENTITY with same type" + salience 256 + when + $entity: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $recommendation: TextEntity(getTextRange().equals($entity.getTextRange()), type() == $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) + then + $entity.addEngines($recommendation.getEngines()); + $recommendation.remove("X.4.0", "remove Entity of type RECOMMENDATION when text range equals ENTITY with same type"); + retract($recommendation); + end + + +// Rule unit: X.5 +rule "X.5.0: Remove Entity of type RECOMMENDATION when intersected by ENTITY" + salience 256 + when + $entity: TextEntity((entityType == EntityType.ENTITY || entityType == EntityType.HINT), active()) + $recommendation: TextEntity(intersects($entity), entityType == EntityType.RECOMMENDATION, !hasManualChanges()) + then + $recommendation.remove("X.5.0", "remove Entity of type RECOMMENDATION when intersected by ENTITY"); + retract($recommendation); + end + + +// Rule unit: X.5 +rule "X.5.1: Remove Entity of type RECOMMENDATION when contained by RECOMMENDATION" + salience 256 + when + $entity: TextEntity($type: type(), entityType == EntityType.RECOMMENDATION, active()) + $recommendation: TextEntity(containedBy($entity), type() != $type, entityType == EntityType.RECOMMENDATION, !hasManualChanges()) + then + $recommendation.remove("X.5.1", "remove Entity of type RECOMMENDATION when contained by RECOMMENDATION"); + retract($recommendation); + end + + +// Rule unit: X.7 +rule "X.7.0: Remove all images" + salience 512 + when + $image: Image(imageType != ImageType.OCR, !hasManualChanges()) + then + $image.remove("X.7.0", "remove all images"); + retract($image); + end + + +// Rule unit: X.8 +rule "X.8.0: Remove Entity when text range and type equals to imported Entity" + salience 257 + when + $entity: TextEntity($type: type(), engines contains Engine.IMPORTED, active()) + $other: TextEntity(getTextRange().equals($entity.getTextRange()), this != $entity, type() == $type, engines not contains Engine.IMPORTED) + then + $other.remove("X.8.0", "remove Entity when text range and type equals to imported Entity"); + $entity.addEngines($other.getEngines()); + retract($other); + end + +rule "X.8.1: Remove Entity when intersected by imported Entity" + salience 256 + when + $entity: TextEntity(engines contains Engine.IMPORTED, active()) + $other: TextEntity(intersects($entity), this != $entity, engines not contains Engine.IMPORTED) + then + $other.remove("X.8.1", "remove Entity when intersected by imported Entity"); + retract($other); + end + +// Rule unit: X.9.0 +rule "X.9.0: Merge mostly contained signatures" + when + $aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI) + $signature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.ALGORITHM, mostlyContains($aiSignature, 0.8)) + then + $aiSignature.remove("X.9.0", "removed because already contained by alogrithm signature"); + $signature.addEngine(LayoutEngine.AI); + end + +// Rule unit: X.10.0 +rule "X.10.0: remove false positives of ai" + when + $anyImage: Image(engines contains LayoutEngine.ALGORITHM) + $aiSignature: Image(imageType == ImageType.SIGNATURE, engines contains LayoutEngine.AI, !mostlyContainedBy($anyImage, 0.8)) + then + $aiSignature.remove("X.10.0", "Removed because false positive"); + end + + + +//------------------------------------ File attributes rules ------------------------------------ + +// Rule unit: FA.1 +rule "FA.1.0: Remove duplicate FileAttributes" + salience 64 + when + $fileAttribute: FileAttribute($label: label, $value: value) + $duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value) + then + retract($duplicate); + end + + +//------------------------------------ Local dictionary search rules ------------------------------------ + +// Rule unit: LDS.0 +rule "LDS.0.0: Run local dictionary search" + agenda-group "LOCAL_DICTIONARY_ADDS" + salience -999 + when + $dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels() + then + entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document) + .forEach(entity -> { + Collection matchedRules = $dictionaryModel.getMatchedRulesForLocalDictionaryEntry(entity.getValue()); + matchedRules.forEach(matchedRule -> entity.addMatchedRule(matchedRule.asSkippedIfApplied())); + }); + end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/drools/files/management/services/DroolsUpToDateTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/drools/files/management/services/DroolsUpToDateTest.java index b6ead597..cac2af47 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/drools/files/management/services/DroolsUpToDateTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/drools/files/management/services/DroolsUpToDateTest.java @@ -5,6 +5,7 @@ import static org.wildfly.common.Assert.assertTrue; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.List; import org.junit.jupiter.api.Test; @@ -25,8 +26,18 @@ public class DroolsUpToDateTest { @SneakyThrows public void assertAllRuleFilesAreUpToDate() { - Path droolsPath = new ClassPathResource("drools").getFile().toPath(); - Files.walk(droolsPath) + Path testResourcesDroolsPath = new ClassPathResource("drools").getFile().toPath(); + processPath(testResourcesDroolsPath); + + Path mainResourcesDroolsPath = Paths.get("src/main/resources/drools/all_rules_documine.drl"); + processPath(mainResourcesDroolsPath); + } + + + @SneakyThrows + private void processPath(Path path) { + + Files.walk(path) .filter(DroolsUpToDateTest::isEntityRuleFile) .forEach(this::validateFile); }