From f16378bded1fdb08021255adde14db1e36586ab0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Mon, 4 Sep 2023 17:26:27 +0200 Subject: [PATCH] RED-7317: fix behavior of recategorize --- .../service/DroolsExecutionService.java | 1 + .../service/ManualChangeFactory.java | 6 + .../test/resources/drools/documine_flora.drl | 1213 ++++------------- 3 files changed, 277 insertions(+), 943 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java index d17761ee..473d788f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/DroolsExecutionService.java @@ -93,6 +93,7 @@ public class DroolsExecutionService { manualRedactions.getEntriesToAdd().forEach(kieSession::insert); manualRedactions.getForceRedactions().forEach(kieSession::insert); manualRedactions.getIdsToRemove().forEach(kieSession::insert); + manualRedactions.getLegalBasisChanges().forEach(kieSession::insert); } kieSession.insert(nerEntities); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualChangeFactory.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualChangeFactory.java index ba0b3950..5a244e9a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualChangeFactory.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/ManualChangeFactory.java @@ -8,6 +8,7 @@ import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; @@ -37,6 +38,11 @@ public class ManualChangeFactory { } else if (baseAnnotation instanceof ManualRedactionEntry manualRedactionEntry) { manualChange.withManualRedactionType(manualRedactionEntry.isAddToDictionary() ? ManualRedactionType.ADD_TO_DICTIONARY : ManualRedactionType.ADD_LOCALLY) .withChange("value", manualRedactionEntry.getValue()); + } else if (baseAnnotation instanceof ManualLegalBasisChange manualLegalBasisChange) { + manualChange.withManualRedactionType(ManualRedactionType.LEGAL_BASIS_CHANGE) + .withChange("section", manualLegalBasisChange.getSection()) + .withChange("value", manualLegalBasisChange.getValue()) + .withChange("legalBasis", manualLegalBasisChange.getLegalBasis()); } manualChange.setProcessedDate(OffsetDateTime.now()); return manualChange; diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index 4f79d270..16a5dab3 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -1,9 +1,3 @@ -package drools - -import static java.lang.String.format; -import static com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility.anyMatch; -import static com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility.exactMatch; - import java.util.List; import java.util.LinkedList; import java.util.Set; @@ -33,6 +27,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; import com.iqser.red.service.redaction.v1.server.document.services.ManualChangesApplicationService; import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity; @@ -49,7 +44,7 @@ import com.iqser.red.service.redaction.v1.server.document.graph.entity.TextEntit global Document document global EntityCreationService entityCreationService -global ManualChangesApplicationService manualRedactionApplicationService +global ManualChangesApplicationService manualChangesApplicationService global Dictionary dictionary //------------------------------------ queries ------------------------------------ @@ -60,6 +55,8 @@ query "getFileAttributes" //--------------------------------------------------------------------------- + + rule "H.0.0 retract table of contents page" when $page: Page(getMainBodyTextBlock().getSearchText().contains("........") || (getMainBodyTextBlock().getSearchText().contains("APPENDICES") && getMainBodyTextBlock().getSearchText().contains("TABLES"))) @@ -82,28 +79,31 @@ rule "H.0.0: Ignore Table of Contents" .forEach(node -> retract(node)) ); end + -// Rule unit: MAN.0 +/* rule "H.0.0: Show headlines" when $headline: Headline() then entityCreationService.bySemanticNode($headline, "headline", EntityType.RECOMMENDATION); end - +*/ rule "H.0.0: Study Type File Attribute" when - not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) + not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","408","414","425","429","436","438","439","471","487")) $section: Section( - (containsString("DATA REQUIREMENT") || containsString("TEST GUIDELINE") || containsString("MÉTODO(S) DE REFERÊNCIA(S):")) + onPage(1) ,(containsString("OECD") || containsString("EPA") || containsString("OPPTS")) ) then Stream.of(RedactionSearchUtility.findBoundariesByRegexIgnoreCase("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|(?:.{5,40}(?:Number |Procedure |Guideline )))(4[\\d]{2})", 1, $section.getTextBlock()), RedactionSearchUtility.findBoundariesByRegexIgnoreCase("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", 1, $section.getTextBlock()), - RedactionSearchUtility.findBoundariesByRegexIgnoreCase("(?<=OECD) Guideline (4\\d{2})", 1, $section.getTextBlock())).flatMap(Collection::stream).findFirst() + RedactionSearchUtility.findBoundariesByRegexIgnoreCase("(?<=OECD) Guideline (4\\d{2})", 1, $section.getTextBlock()), + RedactionSearchUtility.findBoundariesByRegexIgnoreCase("(?<=OECD) Guideline, Method No. (\\d{3})", 1, $section.getTextBlock()) + ).flatMap(Collection::stream).findFirst() .map(boundary -> $section.getTextBlock().subSequence(boundary).toString()) .map(value -> FileAttribute.builder().label("OECD Number").value(value).build()) .ifPresent(fileAttribute -> insert(fileAttribute)); @@ -113,11 +113,7 @@ rule "H.0.0: Study Type File Attribute" rule "DOC.1.0: Guidelines" when $section: Section( - ( - containsString("DATA REQUIREMENT") - || containsString("TEST GUIDELINE") - || containsString("MÉTODO(S) DE REFERÊNCIA(S):") - ) + onPage(1) && ( containsString("OECD") || containsString("EPA") @@ -176,6 +172,19 @@ rule "DOC.1.0: Guidelines" end +rule "DOC.1.1: Guidelines" + when + $headline: Headline( + onPage(1), + containsString("OECD") + ) + then + entityCreationService.byRegex("(OECD (No\\.? )?(\\d{3})( \\(\\d{4}\\))?)", "oecd_guideline", EntityType.ENTITY,1, $headline).forEach(entity -> + entity.apply("DOC.1.1", "OECD Guideline found", "n-a") + ); + end + + rule "DOC.1.2: Guidelines" when $section: Section( @@ -231,84 +240,56 @@ rule "DOC.1.3: Guidelines" end -rule "DOC.2.0: Report number" - when - $section: Section(containsString("LABORATORY PROJECT ID") , containsString("Report Number:")) - then - entityCreationService.lineAfterString("Report Number:", "report_number", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { - entity.apply("DOC.2.0", "Report number found", "n-a"); - }); - end -rule "DOC.3.0: Experimental Starting Date" - when - $section: Section(containsString("Experimental I. Starting Date:") || containsString("Experimental II. Starting Date:") || containsStringIgnoreCase("experimental start date") || containsStringIgnoreCase("experimental starting date")) - then - entityCreationService.lineAfterStrings( - List.of("Experimental start date", - "Experimental start date:", - "Experimental Starting Date", - "Experimental Starting Date:", - "Experimental starting date", - "Experimental starting date:", - "Experimental Start Date", - "Experimental Start Date:", - "Experimental I. Starting Date:", - "Experimental II. Starting Date:"), "experimental_start_date", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.3.0", "Experimental start date found", "n-a"); - }); - end -rule "DOC.3.1: Experimental Completion Date" - when - $section: Section(containsStringIgnoreCase("experimental termination date") || containsStringIgnoreCase("experimental completion date")) - then - entityCreationService.lineAfterStrings( - List.of("Experimental termination date", - "Experimental termination date:", - "Experimental Completion Date", - "Experimental Completion Date:", - "Experimental completion date", - "Experimental completion date:", - "Experimental Termination Date", - "Experimental Termination Date:"), "experimental_end_date", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.3.1", "Experimental end date found", "n-a"); - }); - end + +rule "DOC.3.2: Experimental Completion Date" + salience 10 + when + $section: Section(onPage(1) && (containsString("STUDY COMPLETED ON") || containsString("STUDY COMPLETION DATE") || containsString("Report completion date") || containsString("Date of Report") || containsString("AMENDMENT COMPLETION DATE") || containsString("AMENDMENT COMPLETED ON"))) + + then + entityCreationService.byRegex("STUDY COMPLETED ON (.{5,20}\\d{4})", "experimental_end_date", EntityType.ENTITY, 1, $section).forEach(entity -> { + entity.apply("DOC.3.1", "Experimental end date found", "n-a"); + }); + entityCreationService.byRegex("STUDY COMPLETION DATE (.{5,20}\\d{4})", "experimental_end_date", EntityType.ENTITY, 1, $section).forEach(entity -> { + entity.apply("DOC.3.1", "Experimental end date found", "n-a"); + }); + entityCreationService.byRegex("Report completion date (.{5,20}\\d{4})", "experimental_end_date", EntityType.ENTITY, 1, $section).forEach(entity -> { + entity.apply("DOC.3.1", "Experimental end date found", "n-a"); + }); + entityCreationService.byRegex("Date of Report (.{5,20}\\d{4})", "experimental_end_date", EntityType.ENTITY, 1, $section).forEach(entity -> { + entity.apply("DOC.3.1", "Experimental end date found", "n-a"); + }); + entityCreationService.byRegex("AMENDMENT COMPLETION DATE (.{5,20}\\d{4})", "experimental_end_date", EntityType.ENTITY, 1, $section).forEach(entity -> { + entity.apply("DOC.3.1", "Experimental end date found", "n-a"); + }); + entityCreationService.byRegex("AMENDMENT COMPLETED ON (.{5,20}\\d{4})", "experimental_end_date", EntityType.ENTITY, 1, $section).forEach(entity -> { + entity.apply("DOC.3.1", "Experimental end date found", "n-a"); + }); + end + - // ignore species and strain in irrelevant study types - rule "DOC.4.1: Species" - salience 1 - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","471","474","487")) - $section: Section(hasEntitiesOfType("species") || hasEntitiesOfType("strain")) - then - $section.getEntitiesOfType(List.of("species", "strain")).forEach(entity -> { - entity.removeFromGraph(); - retract(entity); - }); - end // hide all skipped species and strains except in the relevant sections rule "DOC.4.2: Species" salience 1 when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) $section: Section( (hasEntitiesOfType("species") || hasEntitiesOfType("strain")) && !( anyHeadlineContainsStringIgnoreCase("test system") - || anyHeadlineContainsStringIgnoreCase("animals") + || anyHeadlineContainsStringIgnoreCase("Species and strain") || anyHeadlineContainsStringIgnoreCase("specification") ) ) then $section.getEntitiesOfType(List.of("species", "strain")).forEach(entity -> { - entity.removeFromGraph(); + entity.remove("DOC.4.2","n-a"); retract(entity); }); end @@ -316,7 +297,6 @@ rule "DOC.3.1: Experimental Completion Date" rule "DOC.4.3: Species" when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) $section: Section(hasEntitiesOfType("species")) then $section.getEntitiesOfType("species").forEach(entity -> { @@ -328,13 +308,12 @@ rule "DOC.4.3: Species" rule "DOC.5.0: Strain" when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) $section: Section( hasEntitiesOfType("species") && hasEntitiesOfType("strain") && ( anyHeadlineContainsStringIgnoreCase("test system") - || anyHeadlineContainsStringIgnoreCase("animals") + || anyHeadlineContainsStringIgnoreCase("Species and strain") || anyHeadlineContainsStringIgnoreCase("specification") ) ) @@ -345,686 +324,15 @@ rule "DOC.5.0: Strain" end -rule "DOC.7.0: study title by document structure" - when - $table: Table(onPage(1), - (containsString("Final Report") || containsString("SPL")), - numberOfRows == 1, - numberOfCols == 1) - then - - entityCreationService.bySemanticNode($table.getCell(0, 0).streamChildren().toList().get(1), "title", EntityType.ENTITY).ifPresent(entity -> { - entity.apply("DOC.7.0", "Study title found", "n-a"); - }); - end - - -rule "DOC.7.1: study title" - when - $table: Table(onPage(1), (containsString("Final Report") || containsString("SPL"))) - then - entityCreationService.byRegexWithLineBreaksIgnoreCase("(?<=\\n)[\\w\\W]{1,300}(?=\\nFinal Report)", "title", EntityType.ENTITY, $table).findFirst().ifPresent(entity -> { - entity.apply("DOC.7.1", "Title found", "n-a"); - }); - end - - -rule "DOC.7.2: study title" - when - not Table(onPage(1), (containsString("Final Report") || containsString("SPL"))) - $section: Section(onPage(1), (containsString("Final Report") || containsString("SPL"))) - then - entityCreationService.byRegexWithLineBreaksIgnoreCase("(?<=\\n)[\\w\\W]{1,300}(?=\\nFinal Report)", "title", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { - entity.apply("DOC.7.2", "Title found", "n-a"); - }); - end - - - -rule "DOC.8.1: Performing Laboratory (Name)" - when - $section: Section(containsString("PERFORMING LABORATORY:")) - then - entityCreationService.lineAfterString("PERFORMING LABORATORY:", "laboratory_name", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { - entity.apply("DOC.8.1", "Performing Laboratory found", "n-a"); - }); - end - - - rule "DOC.8.2: Performing Laboratory (Country)" - when - nerEntities: NerEntities(hasEntitiesOfType("COUNTRY")) - $section: Section(containsString("PERFORMING LABORATORY:")) - then - nerEntities.streamEntitiesOfType("COUNTRY") - .filter(nerEntity -> $section.getTextRange().contains(nerEntity.textRange())) - .map(nerEntity -> entityCreationService.byNerEntity(nerEntity, "laboratory_country", EntityType.ENTITY, $section)) - .forEach(entity -> { - entity.apply("DOC.8.2", "Performing Laboratory found", "n-a"); - insert(entity); - }); - end - - -rule "DOC.8.3: Performing Laboratory (Country & Name) from dict" - when - $section: Section( - (hasEntitiesOfType("laboratory_country") || hasEntitiesOfType("laboratory_name")) - && (containsString("PERFORMING LABORATORY:") || (containsString("PERFORMING") && containsString("LABORATORY:"))) - ) - then - $section.getEntitiesOfType("laboratory_country").forEach(entity -> { - entity.apply("DOC.8.3", "Performing laboratory country dictionary entry found.", "n-a"); - }); - $section.getEntitiesOfType("laboratory_name").forEach(entity -> { - entity.apply("DOC.8.3", "Performing laboratory name dictionary entry found.", "n-a"); - }); - end - - -rule "DOC.8.4: Performing Laboratory (Country) from dict" - when - $section: Section( - (hasEntitiesOfType("laboratory_country") || hasEntitiesOfType("laboratory_name")) - && !(containsString("PERFORMING LABORATORY:") || (containsString("PERFORMING") && containsString("LABORATORY:"))) - ) - then - $section.getEntitiesOfType(List.of("laboratory_country", "laboratory_name")).forEach(entity -> { - entity.removeFromGraph(); - retract(entity); - }); - end - - -rule "DOC.9.0: GLP Study" - when - $headline: Headline(containsString("GOOD LABORATORY PRACTICE COMPLIANCE") - || containsString("GOOD LABORATORY PRACTICE COMPLIANCE STATEMENT") - || (containsString("DECLARACAO DE CONFORMIDADE") && containsString("PRATICAS DE LABORATORIO")) - || containsString("GLP Certificate") - || containsString("GLP Certificates") - || containsString("GOOD LABORATORY PRACTICE (GLP) CERTIFICATE") - || containsString("Good Laboratory Practice Certificate") - || containsString("STATEMENT OF GLP COMPLIANCE AND AUTHENTICATION")) - then - entityCreationService.bySemanticNode($headline, "glp_study", EntityType.ENTITY).ifPresent(entity -> { - entity.apply("DOC.9.0", "GLP Study found", "n-a"); - }); - end - - -rule "DOC.10.0: Batch number from CoA" - when - $section: Section( - ( - anyHeadlineContainsString("Analytical Report") - || anyHeadlineContainsStringIgnoreCase("Certificate of Analysis") - || containsStringIgnoreCase("Certificate of Analysis") - ) - && ( - containsStringIgnoreCase("batch") - || containsStringIgnoreCase("bath") - || containsStringIgnoreCase("barch") - || containsStringIgnoreCase("bateb") - ) - && ( - containsStringIgnoreCase("identification") - || containsStringIgnoreCase("ldentitfication") - || containsStringIgnoreCase("wentification") - || containsStringIgnoreCase("mentification") - || containsStringIgnoreCase("kientification") - || containsStringIgnoreCase("reference number") - || containsStringIgnoreCase("test substance") - ) - ) - then - entityCreationService.lineAfterStrings(List.of("Batch Identification", - "(Batch Identification):", - "Bateb Identification", - "Batch Wentification", - "Batch Mentification", - "Batch Kientification", - "Barch Identification", - "Bath ldentitfication", - "Batch of test substance :"), "batch_number", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.10.0", "Batch number found in CoA", "n-a"); - }); - end - - -rule "DOC.10.1: Batch number" - when - $section: Section( - ( - anyHeadlineContainsStringIgnoreCase("Test Substance") - || anyHeadlineContainsStringIgnoreCase("Test and Control Substances") - || anyHeadlineContainsStringIgnoreCase("Test Item") - ) - && !( - anyHeadlineContainsString("component") - || anyHeadlineContainsString("reference") - || anyHeadlineContainsString("blank") - ) - && containsStringIgnoreCase("batch") - ) - then - Stream.of(entityCreationService.byRegex("Batch ID ([A-Z\\d\\-]{7,14})", "batch_number", EntityType.ENTITY, 1, $section), - entityCreationService.lineAfterStrings(List.of("Batch Identification", - "Batch number:", - "Batch reference number:", - "Batch:", - "Batch/Lot number:", - "Batch (Lot) Number:", - "Batch Number:", - "Batch Nº:", - "Batch no:" - ), "batch_number", EntityType.ENTITY, $section)).flatMap(a -> a) - .forEach(entity -> { - entity.apply("DOC.10.1", "Batch number found", "n-a"); - }); - end - - -rule "DOC.10.2: Batch number" - when - $section: Section( - ( - anyHeadlineContainsStringIgnoreCase("Test Substance") - || anyHeadlineContainsStringIgnoreCase("Test and Control Substances") - || anyHeadlineContainsStringIgnoreCase("Test Item") - ) - && !( - anyHeadlineContainsString("component") - || anyHeadlineContainsString("reference") - || anyHeadlineContainsString("blank") - ) - && containsStringIgnoreCase("batch") - ) - $batchNumber: String() from List.of("Batch Identification", - "Batch number:", - "Batch reference number:", - "Batch:", - "Batch/Lot number:", - "Batch (Lot) Number:", - "Batch Number:", - "Batch Nº:", - "Batch no:") - $table: Table(containsStringIgnoreCase($batchNumber)) from $section.streamAllSubNodesOfType(NodeType.TABLE).toList() - then - entityCreationService.lineAfterStringAcrossColumnsIgnoreCase($batchNumber, "batch_number", EntityType.ENTITY, $table).forEach(entity -> { - entity.apply("DOC.10.2", "Batch number found", "n-a"); - }); - end - - - - -rule "DOC.11.0: Conclusions - LD50, LC50, Confidence" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","425","436")) - $section: Section( - (getHeadline().containsStringIgnoreCase("Conclusion") || anyHeadlineContainsStringIgnoreCase("Lethality")) - && (containsString("LD") || containsString("LC") || containsString("50") || containsString("LD50") || containsString("lethal concentration") || containsString("lethal dose")) - && ( - containsString("greater than") - || containsString("higher than") - || containsString("above") - || containsString("in excess") - || containsString("exceeds") - || containsString("was found to be") - || containsString("was calculated to be") - || containsString("estimated to be") - ) - ) - then - entityCreationService.byRegexIgnoreCase("(L[D|C]\\s?50|lethal concentration|lethal dose).{1,200}(greater than|considered to be above|in excess of|exceeds|higher than)", "ld50_greater", EntityType.ENTITY,2, $section).forEach(entity -> { - entity.apply("DOC.11.0", "LD50 greater than found", "n-a"); - }); - entityCreationService.byRegexIgnoreCase("\\b(?:(?:greater|higher) than|considered to be above|(?:was|is) (?:found|estimated) to be|was calculated to be|in excess of|exceeds|equal to)\\s?([\\d\\.]{1,6})\\s?mg\\/(?:kg|L)", "ld50_value", EntityType.ENTITY,1, $section).forEach(entity -> { - entity.apply("DOC.11.0", "LD50 value found", "n-a"); - }); - entityCreationService.byRegexIgnoreCase("confidence interval (?:is )?([\\d\\.]{2,6}).{0,20} to (?:greater than )?([\\d\\.]{2,6})", "confidence_minimal", EntityType.ENTITY,1, $section).forEach(entity -> { - entity.apply("DOC.11.0", "Minimal Confidence found", "n-a"); - }); - entityCreationService.byRegexIgnoreCase("confidence interval (?:is )?([\\d\\.]{2,6}).{0,20} to (?:greater than )?([\\d\\.]{2,6})", "confidence_maximal", EntityType.ENTITY,2, $section).forEach(entity -> { - entity.apply("DOC.11.0", "Maximal Confidence found", "n-a"); - }); - end - - -rule "DOC.12.0: Guideline Deviation" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) - $section: Section( - (getHeadline().containsStringIgnoreCase("General Information") || containsString("GENERAL INFORMATION")) - && (containsStringIgnoreCase("from the") || containsStringIgnoreCase("to the")) - ) - then - entityCreationService.betweenRegexes("(?:Deviations? from the [G|g]uidelines?)(?: and| or)?( the)?(?: Study Plan)?", "(?:(?:Deviations? from the Study Plan)|(?:Performing laboratory test)|(?:Other)|(?:Retention of [S|s]amples)|(?:Amendments? to Final Protocol))", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.12.0", "Deviation from Guidelines found", "n-a"); - }); - entityCreationService.betweenRegexes("(?:Deviations? (?:from|to)(?: the)? [S|s]tudy [P|p]lan)", "(?:Regulatory Guidelines)|(?:Other)|(?:Distribution of the report)|(?:Performing laboratory test)|(?:Distribution of the report)|(?:Retention of [S|s]amples)", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.12.0", "Deviation from Study Plan found", "n-a"); - }); - entityCreationService.betweenStrings("Deviations from the study plan", "Regulatory Guidelines", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.12.0", "Deviation from the study plan found", "n-a"); - }); - entityCreationService.byRegexIgnoreCase("(?>Study plan adherence)(.{1,20}deviations.{1,20} to the study plan.{0,50}\\.)\\s", "guideline_deviation", EntityType.ENTITY, 1, $section).forEach(entity -> { - entity.apply("DOC.12.0", "Guideline deviation found in text.", "n-a"); - }); - entityCreationService.betweenStringsIncludeEnd("Deviations from the study plan", "validity of the study.", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.12.0", "Deviation from the study plan found", "n-a"); - }); - end - - -rule "DOC.12.1: Guideline Deviation in text" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) - $section: Section( - getHeadline().containsStringIgnoreCase("Introduction") - && containsStringIgnoreCase("deviations from the protocol") - ) - then - entityCreationService.byRegex("There were no deviations from the protocol.{1,100}\\.\\s", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.12.1", "Guideline deviation found in text.", "n-a"); - }); - end - - -rule "DOC.13.0: Clinical Signs" - when - FileAttribute(label == "OECD Number", value == "425") - $headline: Headline(containsAnyStringIgnoreCase("Clinical Signs", "Macroscopic Findings") && !containsString("TABLE") && !getHeadline().containsStringIgnoreCase("3 - MACROSCOPIC FINDINGS")) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "clinical_signs", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.13.0", "Clinical Signs found", "n-a")); - end - -/* -rule "DOC.13.0: Clinical Signs" - when - FileAttribute(label == "OECD Number", value == "425") - $headline: Headline(containsAnyStringIgnoreCase("Clinical Signs", "Macroscopic Findings") && !containsString("TABLE")) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "clinical_signs", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.13.0", "Clinical Signs found", "n-a")); - end -*/ - -rule "DOC.14.0: Dosages" - when - FileAttribute(label == "OECD Number", value == "425") - $section: Section( - (anyHeadlineContainsStringIgnoreCase("Dosages") || anyHeadlineContainsStringIgnoreCase("Study Design")) - && !getHeadline().containsString("TABLE") - ) - then - entityCreationService.betweenStringsIncludeStartAndEnd("The animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.14.0", "Dosage found", "n-a"); - }); - entityCreationService.betweenStringsIncludeStartAndEnd("Animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.14.0", "Dosage found", "n-a"); - }); - entityCreationService.byRegexWithLineBreaks("(?:\\.[\\s|\\n]|^.{5,20}\\n)([^\\.]{1,200}(?:animal|given|received)[^\\.]{1,200}dose\\s(?:levels?\\s)?(?:of|at)[^\\.]{1,200})(?:\\.[\\s|\\n|$])", "dosages", EntityType.ENTITY,1, $section).forEach(entity -> { - entity.apply("DOC.14.0", "Dosage found", "n-a"); - }); - end - - -rule "DOC.15.0: Mortality" - when - $headline: Headline(containsString("Mortality") && !containsString("TABLE")) - FileAttribute(label == "OECD Number", value == "425") - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.15.0", "Mortality found", "n-a")); - end - - -rule "DOC.17.0: Study Conclusion" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) - $section: Section( - getHeadline().containsStringIgnoreCase("Conclusion") - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "study_conclusion", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.17.0", "Study Conclusion found", "n-a")); - end - /* - -rule "DOC.18.0: Weight Behavior Changes" - when - FileAttribute(label == "OECD Number", value == "402") - $section: Section( - getHeadline().containsStringIgnoreCase("Results") - && ( - containsString("body weight") - || containsString("body weights") - || containsString("bodyweight") - || containsString("bodyweights") - ) - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "weight_behavior_changes", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.18.0", "Weight behavior changes found", "n-a")); - end -*/ - -rule "DOC.18.0: Weight Behavior Changes" - when - FileAttribute(label == "OECD Number", value == "402") - $section: Section( - getHeadline().containsString("Results") - && ( - containsString("body weight") - || containsString("body weights") - || containsString("bodyweight") - || containsString("bodyweights") - ) - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "weight_behavior_changes", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.18.0", "Weight behavior changes found", "n-a")); - end - -rule "DOC.19.0: Necropsy findings" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","436")) - $section: Section( - ( - anyHeadlineContainsStringIgnoreCase("Necropsy") - || getHeadline().containsStringIgnoreCase("Macroscopic Findings") - || getHeadline().containsStringIgnoreCase("Macroscopic examination") - ) - && !getHeadline().containsStringIgnoreCase("Table") - && !getHeadline().containsStringIgnoreCase("Appendix") - && !getHeadline().containsStringIgnoreCase("3 - MACROSCOPIC FINDINGS") - //&& !containsString("3 - MACROSCOPIC FINDINGS") - //&& !anyHeadlineContainsString("3 - MACROSCOPIC FINDINGS") - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "necropsy_findings", EntityType.ENTITY) - .forEach( entity -> entity.apply("DOC.19.0", "Necropsy section found", "n-a")); - end - -/* -rule "DOC.19.0: Necropsy findings" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","436")) - $section: Section( - ( - anyHeadlineContainsStringIgnoreCase("Necropsy") - || getHeadline().containsStringIgnoreCase("Macroscopic Findings") - || getHeadline().containsStringIgnoreCase("Macroscopic examination") - ) - && !getHeadline().containsStringIgnoreCase("Table") - && !getHeadline().containsStringIgnoreCase("Appendix") - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "necropsy_findings", EntityType.ENTITY) - .forEach( entity -> entity.apply("DOC.19.0", "Necropsy section found", "n-a")); - end -*/ - -rule "DOC.22.0: Clinical observations" - when - FileAttribute(label == "OECD Number", value == "403") - $section: Section( - ( - anyHeadlineContainsStringIgnoreCase("Clinical Observations") - || anyHeadlineContainsStringIgnoreCase("Clinical observations") - || anyHeadlineContainsStringIgnoreCase("In-life Observations") - || anyHeadlineContainsStringIgnoreCase("Postmortem Observations") - ) - && !anyHeadlineContainsStringIgnoreCase("Appendix") - && !anyHeadlineContainsStringIgnoreCase("Table") - && !anyHeadlineContainsStringIgnoreCase("Mortality") - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_observations", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.22.0", "Clinical observations section found", "n-a")); - end - -/* -rule "DOC.19.0: Necropsy findings" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","436")) - $section: Section( - ( - getHeadline().containsString("Necropsy") - || getHeadline().containsString("Macroscopic Findings") - || getHeadline().containsString("Macroscopic examination") - ) - && !getHeadline().containsStringIgnoreCase("Table") - && !getHeadline().containsStringIgnoreCase("Appendix") - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "necropsy_findings", EntityType.ENTITY) - .forEach( entity -> entity.apply("DOC.19.0", "Necropsy section found", "n-a")); - end - - -rule "DOC.22.0: Clinical observations" - when - FileAttribute(label == "OECD Number", value == "403") - $section: Section( - ( - anyHeadlineContainsStringIgnoreCase("Clinical Observations") - || anyHeadlineContainsStringIgnoreCase("Clinical observations") - || anyHeadlineContainsStringIgnoreCase("In-life Observations") - || anyHeadlineContainsStringIgnoreCase("Postmortem Observations") - ) - && !anyHeadlineContainsStringIgnoreCase("Appendix") - && !anyHeadlineContainsStringIgnoreCase("Table") - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_observations", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.22.0", "Clinical observations section found", "n-a")); - end -*/ - -/* Die beiden waren vorher auch auskommentiert -rule "DOC.23.1: Bodyweight changes" - when - FileAttribute(label == "OECD Number", value == "403") - $section: Section( - containsString("Bodyweight") - && containsString("Group") - ) - then - entityCreationService.betweenRegexes("\\.\\s\\bBodyweight\\s", "Group.{0,40}[\\s\\d(.]{1,10}mg/L\\)", "bodyweight_changes", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.23.1", "Bodyweight changes found in results", "n-a"); - }); - entityCreationService.betweenRegexes("\\.\\s\\bBodyweight.{100,500}Group.{0,30}mg/L\\)", "\\..{1,20}Group", "bodyweight_changes", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.23.1", "Bodyweight changes found in results", "n-a"); - }); - end - - -rule "DOC.23.2: Bodyweight changes" - when - FileAttribute(label == "OECD Number", value == "403") - $section: Section( - getHeadline().containsString("Bodyweight") - && containsString("Sighting") - && containsString("Main") - ) - then - entityCreationService.betweenRegexes("\\bSighting[\\w\\s]{0,15}[\\s\\d(.]{1,10}mg/L\\)", "\\bMain[\\w\\s]{0,15}[\\s\\(\\d\\.]{1,10}mg/L\\)", "bodyweight_changes", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.23.2", "Bodyweight section found", "n-a"); - }); - entityCreationService.betweenRegexes("\\bMain[\\w\\s]{0,15}[\\s\\d(.]{1,10}mg/L\\)", "the study\\.", "bodyweight_changes", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.23.2", "Bodyweight section found", "n-a"); - }); - end -*/ - - -rule "DOC.23.0: Bodyweight changes" - when - FileAttribute(label == "OECD Number", value == "403") - $headline: Headline(containsAnyStringIgnoreCase("Bodyweight", "Bodyweights", "Body Weights", "Body Weight"), !containsAnyStringIgnoreCase("Appendix", "TABLE")) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "bodyweight_changes", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.23.0", "Bodyweight section found", "n-a")); - end - - -rule "DOC.24.0: Study Design" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","404","405","406","428","429","438","439","474","487")) - $section: Section( - //retry this with only getHeadline().containsStringIgnoreCase("study design") - anyHeadlineContainsStringIgnoreCase("study design") - && !anyHeadlineContainsString("Preliminary screening test") - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "study_design", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.24.0", "Study design section found", "n-a")); - end - - -rule "DOC.24.1: Study Design" - when - Headline(containsStringIgnoreCase("Study Design"), $sectionIdentifier: getSectionIdentifier()) - $headline: Headline(getSectionIdentifier().isChildOf($sectionIdentifier)) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "study_design", EntityType.ENTITY) - .forEach(entity -> { - entity.apply("DOC.24.1", "Study design section found", "n-a"); - }); - end - -/* -rule "DOC.24.0: Study Design" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","404","405","406","428","429","438","439","474","487")) - $section: Section( - anyHeadlineContainsStringIgnoreCase("study design") - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "study_design", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.24.0", "Study design section found", "n-a")); - end -*/ - -rule "DOC.25.0: Results and Conclusion (406, 428, 438, 439, 474 & 487)" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","474","487")) - $parentHeadline: Headline( - containsAnyString("Results", "Conclusion"), - !containsAnyString("POSITIVE CONTROL", "Positive Control", "Evaluation", "Micronucleus", "TABLE", "DISCUSSION", "CONCLUSIONS", "Interpretation","Viability", "analysis"), - $sectionIdentifier: getSectionIdentifier() - ) - not Headline(getSectionIdentifier().isChildOf($sectionIdentifier)) - then - entityCreationService.bySemanticNodeParagraphsOnly($parentHeadline.getParent(), "results_and_conclusion", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.25.0", "Results and Conclusion found", "n-a")); - end - - -rule "DOC.25.1: Results and Conclusion (406, 428, 438, 439, 474 & 487)" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","474","487")) - Headline( - containsAnyString("Results", "Conclusion"), - !containsAnyString("POSITIVE CONTROL", "Positive Control", "Evaluation", "Micronucleus", "TABLE", "DISCUSSION", "CONCLUSIONS", "Interpretation","Viability", "analysis"), - $sectionIdentifier: getSectionIdentifier() - ) - $headline: Headline(getSectionIdentifier().isChildOf($sectionIdentifier)) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "results_and_conclusion", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.25.1", "Results and Conclusion found", "n-a")); - end - - -// TBD: This rule now finds both Results and RESULTS AND DISCUSSION. This ensures that we do not have empty Components in some of the files. In RESULTS AND DISCUSSION we should find every Subsection, not just the first. -rule "DOC.26.0: Detailing (404 & 405)" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("404","405")) - $section: Section( - anyHeadlineContainsStringIgnoreCase("Results") - && !getHeadline().containsStringIgnoreCase("Evaluation") - && !getHeadline().containsStringIgnoreCase("study") - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "detailing", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.26.0", "Detailing found", "n-a")); - end - - -/* -rule "DOC.26.0: Detailing (404 & 405)" - when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("404","405")) - $section: Section( - getHeadline().containsStringIgnoreCase("Results") - && !getHeadline().containsStringIgnoreCase("Evaluation") - && !getHeadline().containsStringIgnoreCase("study") - && !getHeadline().containsStringIgnoreCase("discussion") - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "detailing", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.26.0", "Detailing found", "n-a")); - end -*/ - -rule "DOC.32.0: Preliminary Test Results (429)" - when - FileAttribute(label == "OECD Number", value == "429") - $section: Section( - ((anyHeadlineContainsString("Preliminary Screening Test") && containsString("Clinical observations")) - || anyHeadlineContainsString("Pre-Experiment")) - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "preliminary_test_results", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.32.0", "Preliminary Test Results found", "n-a")); - end - - -rule "DOC.33.0: Test Results (429)" - when - FileAttribute(label == "OECD Number", value == "429") - $section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment"))) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "test_results", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.33.0", "Test Results found", "n-a")); - end - -rule "DOC.33.1: Test Results (429)" - when - Headline(containsStringIgnoreCase("RESULTS AND DISCUSSION"), $sectionIdentifierResultsAndDiscussion: getSectionIdentifier()) - $headline: Headline(getSectionIdentifier().isChildOf($sectionIdentifierResultsAndDiscussion)) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "test_results", EntityType.ENTITY) - .forEach(entity -> { - entity.apply("DOC.33.1", "Test Results found", "n-a"); - }); - end - - -rule "DOC.34.0: Approach used (429)" - when - FileAttribute(label == "OECD Number", value == "429") - $section: Section( - hasEntitiesOfType("species") - && (containsStringIgnoreCase("animals per") || containsStringIgnoreCase("animals /")) - ) - then - entityCreationService.byRegexIgnoreCase("\\banimals (?:per|\\/) .{0,15}(group)\\b", "approach_used", EntityType.ENTITY,1, $section).forEach(entity -> { - entity.apply("DOC.34.0", "Study animal approach found.", "n-a"); - }); - end rule "DOC.35.0: Sex" when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("405","429")) + $section: Section( ( anyHeadlineContainsStringIgnoreCase("animal") + || anyHeadlineContainsStringIgnoreCase("Species and strain") || anyHeadlineContainsStringIgnoreCase("test system") ) && !getHeadline().containsStringIgnoreCase("selection") @@ -1042,218 +350,225 @@ rule "DOC.35.0: Sex" -rule "DOC.35.1: Animal Number 405" +rule "DOC.6.0: Authors" when - FileAttribute(label == "OECD Number", value == "405") - $section: Section( - ( - anyHeadlineContainsStringIgnoreCase("animal") - || anyHeadlineContainsStringIgnoreCase("test system") - || anyHeadlineContainsStringIgnoreCase("reaction") - ) - && !getHeadline().containsString("selection") - && ( - containsStringIgnoreCase("number of animals") - || containsStringIgnoreCase("no.") - ) - ) + $headline: Headline(onPage(1), containsString("AUTHOR")) then - entityCreationService.byRegexIgnoreCase("(Number of animals:)[\\w\\s]{0,10}\\b([\\d]{1,3})\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { - entity.apply("DOC.35.1", "Number of animals found", "n-a"); - }); - entityCreationService.byRegexIgnoreCase("(?:.{1,10} No\\. )([\\d\\w\\-]{3,8})", "animal_numbers", EntityType.ENTITY,1, $section).forEach(entity -> { - entity.apply("DOC.35.1", "Number of animals found", "n-a"); - }); + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "author", EntityType.ENTITY) + .forEach(entity -> entity.apply("DOC.6.0", "Author found", "n-a")); end -rule "DOC.35.2: Animal Number 429" +rule "DOC.6.2: Authors" when - FileAttribute(label == "OECD Number", value == "429") - $section: Section( - ( - anyHeadlineContainsStringIgnoreCase("animal") - || anyHeadlineContainsStringIgnoreCase("test system") - ) - && !getHeadline().containsString("selection") - && containsStringIgnoreCase("number of animals") - && (containsStringIgnoreCase("per") || containsString("/")) - && containsStringIgnoreCase("group") - ) + $page: Page(getNumber() == 1, getMainBodyTextBlock().getSearchText() (contains "AUTHOR(S)" || contains "AUTHORS" || contains "Author"), getMainBodyTextBlock().getSearchText() (contains "STUDY COMPLETED ON" || contains "STUDY COMPLETION DATE" || contains "DATE OF INTERIM REPORT" || contains "Report completion date" || contains "Date of Report" || contains "AMENDMENT COMPLETION DATE")) then - entityCreationService.byRegexIgnoreCase("(Number of animals per group:)[\\w\\s]{0,10}\\b([\\d]{1,3})\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { - entity.apply("DOC.35.2", "Number of animals in group found", "n-a"); - }); - entityCreationService.byRegexIgnoreCase("(Number of animals per group:).{0,60}\\b([\\d]{1,3})\\sper group\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { - entity.apply("DOC.35.2", "Number of animals in group found", "n-a"); - }); - entityCreationService.byRegexIgnoreCase("([\\d]{1,3})[\\w\\s\\/]{0,20}(?:treatment )?group\\b", "number_of_animals", EntityType.ENTITY,1 , $section).forEach(entity -> { - entity.apply("DOC.35.2", "Number of animals in group found", "n-a"); - }); - end + List startBoundaries = new LinkedList<>(); + startBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("AUTHOR(S)", $page.getMainBodyTextBlock())); + startBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("AUTHORS", $page.getMainBodyTextBlock())); + startBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("Author", $page.getMainBodyTextBlock())); -rule "DOC.35.3: No. Of animals - Fallback to appendix tables listing all individual animals for 429" - when - $keyword: String() from List.of("Animal Number", "Animal No.", "Animal number") - $table: Table(containsString($keyword) && getHeadline().containsString("TABLE") && getHeadline().containsString("Individual")) - FileAttribute(label == "OECD Number", value == "429") - then - $table.streamTableCellsWithHeader($keyword) - .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "animal_numbers", EntityType.ENTITY)) - .filter(Optional::isPresent) - .map(Optional::get) - .forEach(entity -> { - entity.apply("DOC.35.3", "Animal number found.", "n-a"); - insert(entity); - }); + List stopBoundaries = new LinkedList<>(); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("STUDY COMPLETED ON", $page.getMainBodyTextBlock())); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("STUDY COMPLETION DATE", $page.getMainBodyTextBlock())); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("DATE OF INTERIM REPORT", $page.getMainBodyTextBlock())); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("Report completion date", $page.getMainBodyTextBlock())); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("Date of Report", $page.getMainBodyTextBlock())); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("AMENDMENT COMPLETION DATE", $page.getMainBodyTextBlock())); + + entityCreationService.betweenBoundaries(startBoundaries, stopBoundaries, "author", EntityType.ENTITY, document).forEach(entity -> { + entity.apply("DOC.6.2", "Author found", "n-a"); + }); end -rule "DOC.37.0: 4h Exposure" + + +rule "DOC.6.6: laboratory_project_identification" when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("403","436")) - $section: Section( - (containsStringIgnoreCase("4 hours") || containsStringIgnoreCase("four hours")) - ) + $page: Page(getNumber() == 1, getMainBodyTextBlock().getSearchText() (contains "LABORATORY PROJECT IDENTIFICATION" || contains "TEST FACILITY PROJECT IDENTIFICATION" || contains "Laboratory Project Identification")) then - /* entityCreationService.byRegexIgnoreCase("(?<=\\.\\s\\b).{1,100}(4|four) hours.{1,250}(?=\\b\\.|\\B\\.)", "4h_exposure", EntityType.ENTITY, $section).forEach(entity -> { - entity.apply("DOC.37.0", "4h exposure sentence found", "n-a"); - }); */ - entityCreationService.byRegexIgnoreCase("((?<=\\.\\s\\b).{1,100}(4|four) hours.*?\\.) ", "4h_exposure", EntityType.ENTITY,1, $section).forEach(entity -> { - entity.apply("DOC.37.0", "4h exposure sentence found", "n-a"); + List startBoundaries = new LinkedList<>(); + startBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("LABORATORY PROJECT IDENTIFICATION", $page.getMainBodyTextBlock())); + startBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("TEST FACILITY PROJECT IDENTIFICATION", $page.getMainBodyTextBlock())); + + List stopBoundaries = new LinkedList<>(); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("SPONSOR", $page.getMainBodyTextBlock())); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("VOLUME", $page.getMainBodyTextBlock())); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("This", $page.getMainBodyTextBlock())); + + entityCreationService.betweenBoundaries(startBoundaries, stopBoundaries, "laboratory_project_identification", EntityType.ENTITY, document).forEach(entity -> { + entity.apply("DOC.6.6", "Laboratory Project Identification", "n-a"); + }); + end + + + +rule "DOC.7.2: study title by document structure" + when + $page: Page(getNumber() == 1, getMainBodyTextBlock().getSearchText() (contains "STUDY TITLE" || contains "Study Title" || contains "STUDYTITLE" || contains "Report (Final)")) + then + + List startBoundaries = new LinkedList<>(); + startBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("STUDY TITLE", $page.getMainBodyTextBlock())); + startBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("STUDYTITLE", $page.getMainBodyTextBlock())); + startBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("Report (Final)", $page.getMainBodyTextBlock())); + + List stopBoundaries = new LinkedList<>(); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("TEST GUIDELINES", $page.getMainBodyTextBlock())); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("TEST GUIDELINE(S)", $page.getMainBodyTextBlock())); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("Guidelines", $page.getMainBodyTextBlock())); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("DATA REQUIREMENT", $page.getMainBodyTextBlock())); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("AUTHOR(S)", $page.getMainBodyTextBlock())); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("AUTHOR", $page.getMainBodyTextBlock())); + + entityCreationService.betweenBoundaries(startBoundaries, stopBoundaries, "title", EntityType.ENTITY, document).forEach(entity -> { + entity.apply("DOC.7.2", "Study title found", "n-a"); + }); + end + + + +rule "DOC.8.2: Performing Laboratory" + when + $page: Page(getNumber() == 1, getMainBodyTextBlock().getSearchText() (contains "PERFORMING LABORATORY" || contains "TEST FACILITIES" || contains "TEST FACILITY" || contains "Test Facility"), getMainBodyTextBlock().getSearchText() (contains "LABORATORY PROJECT IDENTIFICATION" || contains "TEST FACILITY PROJECT IDENTIFICATION" || contains "Sponsor")) + then + List startBoundaries = new LinkedList<>(); + startBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("PERFORMING LABORATORY", $page.getMainBodyTextBlock())); + startBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("TEST FACILITIES", $page.getMainBodyTextBlock())); + startBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("TEST FACILITY", $page.getMainBodyTextBlock())); + + List stopBoundaries = new LinkedList<>(); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("LABORATORY PROJECT IDENTIFICATION", $page.getMainBodyTextBlock())); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("TEST FACILITY PROJECT IDENTIFICATION", $page.getMainBodyTextBlock())); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("Sponsor", $page.getMainBodyTextBlock())); + stopBoundaries.addAll(RedactionSearchUtility.findBoundariesByStringIgnoreCase("PROJECT IDENTIFICATION", $page.getMainBodyTextBlock())); + + entityCreationService.betweenBoundaries(startBoundaries, stopBoundaries, "laboratory_name", EntityType.ENTITY, document).forEach(entity -> { + entity.apply("DOC.8.2", "Performing Laboratory found", "n-a"); + }); + end + + + + +rule "DOC.98.2: Summary Methods" + when + $headline: Headline(containsString("1.1. METHODS")) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "summary_methods", EntityType.ENTITY) + .filter(e -> !e.getValue().contains("Report; Project No")) + .filter(e -> !e.getValue().startsWith("This document")) + .filter(e -> !e.getValue().startsWith("Page")) + .filter(e -> !e.getValue().startsWith("2. INTRODUCTION")) + .filter(e -> !e.getValue().startsWith("BASF")) + .filter(e -> !e.getValue().startsWith("The Chemical Company")) + .filter(e -> !e.getValue().startsWith("We create chemistry")) + .forEach(entity -> entity.apply("DOC.8.2", "Summary Methods found", "n-a")); + end + +rule "DOC.98.3: Summary Observations Laboratory" + when + $headline: Headline(containsString("1.2. OBSERVATIONS")) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "summary_observations", EntityType.ENTITY) + .filter(e -> !e.getValue().contains("Report; Project No")) + .filter(e -> !e.getValue().startsWith("This document")) + .filter(e -> !e.getValue().startsWith("Page")) + .filter(e -> !e.getValue().startsWith("2. INTRODUCTION")) + .filter(e -> !e.getValue().startsWith("BASF")) + .filter(e -> !e.getValue().startsWith("The Chemical Company")) + .filter(e -> !e.getValue().startsWith("We create chemistry")) + .forEach(entity -> entity.apply("DOC.8.3", "Summary Observations found", "n-a")); + end + + +rule "DOC.98.5: Summary Results" + when + Headline((containsStringIgnoreCase("1.3. RESULTS") || containsStringIgnoreCase("1.2. RESULTS")), $sectionIdentifier: getSectionIdentifier()) + $headline: Headline(getSectionIdentifier().isChildOf($sectionIdentifier)) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "summary_results", EntityType.ENTITY) + .filter(e -> !e.getValue().contains("Report; Project No")) + .filter(e -> !e.getValue().startsWith("This document")) + .filter(e -> !e.getValue().startsWith("Page")) + .filter(e -> !e.getValue().startsWith("2. INTRODUCTION")) + .filter(e -> !e.getValue().startsWith("BASF")) + .filter(e -> !e.getValue().startsWith("The Chemical Company")) + .filter(e -> !e.getValue().startsWith("We create chemistry")) + .forEach(entity -> { + entity.apply("DOC.8.5", "Summary Results", "n-a"); + }); + end + +rule "DOC.98.6: Summary Results 2" + when + $headline: Headline(containsString("1.2. RESULTS")) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "summary_results", EntityType.ENTITY) + .filter(e -> !e.getValue().contains("Report; Project No")) + .filter(e -> !e.getValue().startsWith("This document")) + .filter(e -> !e.getValue().startsWith("Page")) + .filter(e -> !e.getValue().startsWith("2. INTRODUCTION")) + .filter(e -> !e.getValue().startsWith("BASF")) + .filter(e -> !e.getValue().startsWith("The Chemical Company")) + .filter(e -> !e.getValue().startsWith("We create chemistry")) + .forEach(entity -> entity.apply("DOC.8.5", "Summary Results", "n-a")); + end + + + +rule "DOC.98.4: Summary Conclusion" + when + $headline: Headline(containsString("1.4. CONCLUSION") || containsString("1.3. CONCLUSION")) + then + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "study_conclusion", EntityType.ENTITY) + .filter(e -> !e.getValue().contains("Report; Project No")) + .filter(e -> !e.getValue().startsWith("This document")) + .filter(e -> !e.getValue().startsWith("Page")) + .filter(e -> !e.getValue().startsWith("2. INTRODUCTION")) + .filter(e -> !e.getValue().startsWith("BASF")) + .filter(e -> !e.getValue().startsWith("The Chemical Company")) + .filter(e -> !e.getValue().startsWith("We create chemistry")) + .forEach(entity -> entity.apply("DOC.8.3", "Summary Conculsion found", "n-a")); + end + + + +rule "DOC.9.0: GLP Study" + when + $headline: Headline(containsString("GOOD LABORATORY PRACTICE COMPLIANCE") + || containsString("GOOD LABORATORY PRACTICE COMPLIANCE STATEMENT") + || (containsString("DECLARACAO DE CONFORMIDADE") && containsString("PRATICAS DE LABORATORIO")) + || containsString("GLP Certificate") + || containsString("GLP Certificates") + || containsString("GOOD LABORATORY PRACTICE (GLP) CERTIFICATE") + || containsString("Good Laboratory Practice Certificate") + || containsString("STATEMENT OF GLP COMPLIANCE AND AUTHENTICATION") + || containsString("GLP CERTIFICATE (FROM THE COMPETENT AUTHORITY)") + || containsString("GLP COMPLIANCE STATEMENT") + || containsString("GLP STATEMENT") + ) + then + entityCreationService.bySemanticNode($headline, "glp_study", EntityType.ENTITY).ifPresent(entity -> { + entity.apply("DOC.9.0", "GLP Study found", "n-a"); }); end -rule "DOC.39.0: Dilution of the test substance" + +rule "DOC.9.1: GLP Study" when - FileAttribute(label == "OECD Number", value == "404") - $section: Section( - getHeadline().containsString("Formulation") - && containsString("dilution") - ) + $paragraph: Paragraph(containsString("GLP COMPLIANCE STATEMENT")) then - entityCreationService.bySemanticNodeParagraphsOnly($section, "dilution", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.39.0", "Dilution found.", "n-a")); + entityCreationService.byRegex("GLP COMPLIANCE STATEMENT", "glp_study", EntityType.ENTITY, $paragraph).forEach(entity -> { + entity.apply("DOC.9.1", "GLP Study found", "n-a"); + }); end -rule "DOC.40.0: Positive Control" - when - FileAttribute(label == "OECD Number", value == "429") - $section: Section( - getHeadline().containsStringIgnoreCase("Positive Control") - && !(getHeadline().containsStringIgnoreCase("Appendix") || getHeadline().containsStringIgnoreCase("Table")) - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "positive_control", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.40.0", "Positive control found.", "n-a")); - end - - -rule "DOC.42.0: Mortality Statement" - when - FileAttribute(label == "OECD Number", value == "402") - $headline: Headline(containsStringIgnoreCase("Mortality") && !containsString("TABLE")) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality_statement", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.42.0", "Mortality Statement found", "n-a")); - end - -/* -rule "DOC.42.0: Mortality Statement" - when - FileAttribute(label == "OECD Number", value == "402") - $headline: Headline(containsString("Mortality") && !containsString("TABLE")) - then - entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality_statement", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.42.0", "Mortality Statement found", "n-a")); - end -*/ - -rule "DOC.43.0: Dose Mortality" - when - FileAttribute(label == "OECD Number", value == "425") - $table: Table( - (hasHeader("Mortality") || hasHeader("Long Term Results") || hasHeader("LongTerm Outcome") || hasHeader("Long Term Outcome") || hasHeader("Comments") || hasHeader("Viability / Mortality") || hasHeader("Viability/Mortality")) - && - (hasHeader("Dose [mg/kg bodyweight]") || hasHeader("Dose [mg/kg body weight]") ||hasHeader("Dose (mg/kg)") || hasHeader("Dose levei (mg/kg)") || hasHeader("Dose Level (mg/kg)") || hasHeader("Dose level (mg/kg)") || hasHeader("Dosage [mg/kg body weight]")) - ) - then - Stream.of($table.streamTableCellsWithHeader("Mortality"), - $table.streamTableCellsWithHeader("Comments"), - $table.streamTableCellsWithHeader("Long Term Results"), - $table.streamTableCellsWithHeader("Long Term Outcome"), - $table.streamTableCellsWithHeader("LongTerm Outcome"), - $table.streamTableCellsWithHeader("Viability / Mortality"), - $table.streamTableCellsWithHeader("Viability/Mortality") - ).flatMap(a -> a) - .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality", EntityType.ENTITY)) - .filter(Optional::isPresent) - .map(Optional::get) - .forEach(entity -> { - entity.apply("DOC.43.0", "Dose Mortality Data found.", "n-a"); - insert(entity); - }); - - Stream.of($table.streamTableCellsWithHeader("Dose [mg/kg bodyweight]"), - $table.streamTableCellsWithHeader("Dose [mg/kg body weight]"), - $table.streamTableCellsWithHeader("Dose levei (mg/kg)"), - $table.streamTableCellsWithHeader("Dose Level (mg/kg)"), - $table.streamTableCellsWithHeader("Dose level (mg/kg)"), - $table.streamTableCellsWithHeader("Dose (mg/kg)"), - $table.streamTableCellsWithHeader("Dosage [mg/kg body weight]") - ).flatMap(a -> a) - .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality_dose", EntityType.ENTITY)) - .filter(Optional::isPresent) - .map(Optional::get) - .forEach(entity -> { - entity.apply("DOC.43.0", "Dose Mortality Data found.", "n-a"); - insert(entity); - }); - end - - -rule "DOC.44.0: Results (Main Study)" - when - FileAttribute(label == "OECD Number", value == "429") - $section: Section( - getHeadline().containsString("Results") - && getHeadline().getGetTextRange().length() < 20 - && !(getHeadline().containsString("Appendix") || getHeadline().containsString("Table")) - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "results_(main_study)", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.44.0", "Results for main study found.", "n-a")); - end - - -rule "DOC.45.0: Doses (mg/kg bodyweight)" - when - FileAttribute(label == "OECD Number", value == "402") - $section: Section( - anyHeadlineContainsStringIgnoreCase("study design") - ) - then - entityCreationService.bySemanticNodeParagraphsOnly($section, "doses_(mg_kg_bw)", EntityType.ENTITY) - .forEach(entity -> entity.apply("DOC.45.0", "Doses per bodyweight information found", "n-a")); - end - -// This is just an example for new rules feature. -//rule "DOC.99.0" -// when -// Headline(containsStringIgnoreCase("materials and methods"), $sectionIdentifierMaterials: getSectionIdentifier()) -// Headline(containsStringIgnoreCase("controls"), getSectionIdentifier().isChildOf($sectionIdentifierMaterials), $sectionIdentifierControls: getSectionIdentifier()) -// $headline: Headline(containsStringIgnoreCase("positive control substances"), getSectionIdentifier().isChildOf($sectionIdentifierControls)) -// then -// System.out.println($headline); -// entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "irgendwas", EntityType.ENTITY) -// .forEach(entity -> { -// entity.apply("DOC.6.0", "positive control substance found", "n-a"); -// }); -// end //------------------------------------ Manual redaction rules ------------------------------------ @@ -1313,7 +628,6 @@ rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to // Rule unit: MAN.2 rule "MAN.2.0: Apply force redaction" - no-loop true salience 128 when $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) @@ -1322,10 +636,10 @@ rule "MAN.2.0: Apply force redaction" $entityToForce.getManualOverwrite().addChange($force); update($entityToForce); $entityToForce.getIntersectingNodes().forEach(node -> update(node)); + retract($force); end rule "MAN.2.1: Apply force redaction to images" - no-loop true salience 128 when $force: ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED) @@ -1334,6 +648,7 @@ rule "MAN.2.1: Apply force redaction to images" $imageToForce.getManualOverwrite().addChange($force); update($imageToForce); update($imageToForce.getParent()); + retract($force); end @@ -1341,9 +656,9 @@ rule "MAN.2.1: Apply force redaction to images" rule "MAN.3.0: Apply entity recategorization" salience 128 when - $recategorization: ManualRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate) + $recategorization: ManualRecategorization($id: annotationId, $type: type, status == AnnotationStatus.APPROVED, $requestDate: requestDate) not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) - $entityToBeRecategorized: TextEntity(matchesAnnotationId($id)) + $entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type != $type) then $entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node)); manualChangesApplicationService.recategorize($entityToBeRecategorized, $recategorization); @@ -1352,7 +667,18 @@ rule "MAN.3.0: Apply entity recategorization" retract($entityToBeRecategorized); end -rule "MAN.3.1: Apply image recategorization" +rule "MAN.3.1: Apply entity recategorization of same type" + salience 128 + when + $recategorization: ManualRecategorization($id: annotationId, $type: type, status == AnnotationStatus.APPROVED, $requestDate: requestDate) + not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate)) + $entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type == $type) + then + $entityToBeRecategorized.getManualOverwrite().addChange($recategorization); + retract($recategorization); + end + +rule "MAN.3.2: Apply image recategorization" salience 128 when $recategorization: ManualRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $requestDate: requestDate) @@ -1365,6 +691,7 @@ rule "MAN.3.1: Apply image recategorization" retract($recategorization); end + // Rule unit: MAN.4 rule "MAN.4.0: Apply legal basis change" salience 128