From 27c64acbea48b2399e3ab63ed6806c83bf3fadea Mon Sep 17 00:00:00 2001 From: deiflaender Date: Fri, 30 Jun 2023 11:43:37 +0200 Subject: [PATCH] DM-307: Rules that lead to files in error state because section has no paragrapghs --- .../document/graph/nodes/Headline.java | 4 + .../test/resources/drools/documine_flora.drl | 98 +++++++++++++------ 2 files changed, 70 insertions(+), 32 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Headline.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Headline.java index 1f7747d4..a1507a77 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Headline.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Headline.java @@ -75,4 +75,8 @@ public class Headline implements GenericSemanticNode { return Headline.builder().leafTextBlock(AtomicTextBlock.empty(-1L, 0, new Page(), -1, null)).build(); } + public boolean hasParagraphs(){ + return getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).findFirst().isPresent(); + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index 621fcec0..ff014963 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -684,6 +684,7 @@ rule "13: Clinical Signs" || getHeadline().containsString("Macroscopic Findings") ) && !getHeadline().containsString("TABLE") + && hasParagraphs() ) then var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "clinical_signs", EntityType.ENTITY, $section); @@ -730,7 +731,7 @@ rule "14: Dosages" rule "15: Mortality" when - $headline: Headline(containsString("Mortality") && !containsString("TABLE")) + $headline: Headline(containsString("Mortality") && !containsString("TABLE") && hasParagraphs()) FileAttribute(label == "OECD Number", value == "425") then @@ -748,6 +749,7 @@ rule "17: Study Conclusion" FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) $section: Section( getHeadline().containsString("Conclusion") + && hasParagraphs() ) then var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "study_conclusion", EntityType.ENTITY, $section); @@ -769,6 +771,7 @@ rule "18: Weight Behavior Changes" || containsString("bodyweight") || containsString("bodyweights") ) + && hasParagraphs() ) then var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "weight_behavior_changes", EntityType.ENTITY, $section); @@ -789,6 +792,7 @@ rule "19: Necropsy findings" ) && !getHeadline().containsStringIgnoreCase("Table") && !getHeadline().containsStringIgnoreCase("Appendix") + && hasParagraphs() ) then var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "necropsy_findings", EntityType.ENTITY, $section); @@ -811,6 +815,7 @@ rule "22: Clinical observations" ) && !anyHeadlineContainsStringIgnoreCase("Appendix") && !anyHeadlineContainsStringIgnoreCase("Table") + && hasParagraphs() ) then @@ -901,6 +906,7 @@ rule "24: Study Design" FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","404","405","406","428","429","438","439","474","487")) $section: Section( anyHeadlineContainsStringIgnoreCase("study design") + && hasParagraphs() ) then var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "study_design", EntityType.ENTITY, $section); @@ -920,7 +926,9 @@ rule "25: Results and Conclusion (406, 428, 438, 439, 474 & 487)" && !getHeadline().containsString("POSITIVE CONTROL") && !getHeadline().containsString("Positive Control") && !getHeadline().containsString("Evaluation") && !getHeadline().containsString("Micronucleus") && !getHeadline().containsString("TABLE") && !getHeadline().containsString("DISCUSSION") && - !getHeadline().containsString("CONCLUSIONS") && !getHeadline().containsString("Interpretation") && !getHeadline().containsString("Viability")) + !getHeadline().containsString("CONCLUSIONS") && !getHeadline().containsString("Interpretation") && !getHeadline().containsString("Viability") + && hasParagraphs() + ) then var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "results_and_conclusion", EntityType.ENTITY, $section); entity.setRedactionReason("Results and Conclusion found"); @@ -935,14 +943,18 @@ rule "26: Detailing (404 & 405)" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("404","405")) $section: Section( - getHeadline().containsStringIgnoreCase("Results") && !getHeadline().containsStringIgnoreCase("Evaluation") && !getHeadline().containsStringIgnoreCase("study") && hasParagraphs() + getHeadline().containsStringIgnoreCase("Results") && !getHeadline().containsStringIgnoreCase("Evaluation") && !getHeadline().containsStringIgnoreCase("study") && !getHeadline().containsStringIgnoreCase("discussion") && hasParagraphs() ) then - var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "detailing", EntityType.ENTITY, $section); - entity.setRedactionReason("Detailing found"); - entity.setLegalBasis("n-a"); - entity.setRedaction(true); - entity.addMatchedRule("26"); + var paragraphs = $section.streamAllSubNodesOfType(NodeType.PARAGRAPH).toList(); + for(var p : paragraphs){ + entityCreationService.bySemanticNode(p, "detailing", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Detailing found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("26"); + }); + } end @@ -953,6 +965,7 @@ rule "32: Preliminary Test Results (429)" ( (getHeadline().containsString("Preliminary Screening Test") && containsString("Clinical observations")) || getHeadline().containsString("Pre-Experiment")) + && hasParagraphs() ) then var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "preliminary_test_results", EntityType.ENTITY, $section); @@ -966,7 +979,7 @@ rule "32: Preliminary Test Results (429)" rule "33: Test Results (429)" when FileAttribute(label == "OECD Number", value == "429") - $section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment"))) + $section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment")) && hasParagraphs()) then var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "test_results", EntityType.ENTITY, $section); entity.setRedactionReason("Test Results found"); @@ -1002,8 +1015,8 @@ rule "35: Sex" FileAttribute(label == "OECD Number", valueEqualsAnyOf("405","429")) $section: Section( ( - getHeadline().containsStringIgnoreCase("animal") - || getHeadline().containsStringIgnoreCase("test system") + anyHeadlineContainsStringIgnoreCase("animal") + || anyHeadlineContainsStringIgnoreCase("test system") ) && !getHeadline().containsStringIgnoreCase("selection") && ( @@ -1029,9 +1042,9 @@ rule "35a: Animal Number 405" FileAttribute(label == "OECD Number", value == "405") $section: Section( ( - getHeadline().containsString("animal") - || getHeadline().containsString("test system") - || getHeadline().containsString("reaction") + anyHeadlineContainsStringIgnoreCase("animal") + || anyHeadlineContainsStringIgnoreCase("test system") + || anyHeadlineContainsStringIgnoreCase("reaction") ) && !getHeadline().containsString("selection") && ( @@ -1063,8 +1076,8 @@ rule "35b: Animal Number 429" FileAttribute(label == "OECD Number", value == "429") $section: Section( ( - getHeadline().containsString("animal") - || getHeadline().containsString("test system") + getHeadline().containsStringIgnoreCase("animal") + || getHeadline().containsStringIgnoreCase("test system") ) && !getHeadline().containsString("selection") && containsStringIgnoreCase("number of animals") @@ -1085,7 +1098,7 @@ rule "35b: Animal Number 429" entity.setRedaction(true); entity.addMatchedRule("35"); }); - entityCreationService.byRegexIgnoreCase("([\\d]{1,3})[\\w\\s\\/]{0,20}(?:treatment )?group\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { + entityCreationService.byRegexIgnoreCase("([\\d]{1,3})[\\w\\s\\/]{0,20}(?:treatment )?group\\b", "number_of_animals", EntityType.ENTITY,1 , $section).forEach(entity -> { entity.setRedactionReason("Number of animals in group found"); entity.setLegalBasis("n-a"); entity.setRedaction(true); @@ -1147,6 +1160,7 @@ rule "39: Dilution of the test substance" $section: Section( getHeadline().containsString("Formulation") && containsString("dilution") + && hasParagraphs() ) then var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "dilution", EntityType.ENTITY, $section); @@ -1161,11 +1175,12 @@ rule "40: Positive Control" when FileAttribute(label == "OECD Number", value == "429") $section: Section( - getHeadline().containsString("Positive Control") + getHeadline().containsStringIgnoreCase("Positive Control") && !( - getHeadline().containsString("Appendix") - || getHeadline().containsString("Table") + getHeadline().containsStringIgnoreCase("Appendix") + || getHeadline().containsStringIgnoreCase("Table") ) + && hasParagraphs() ) then var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "positive_control", EntityType.ENTITY, $section); @@ -1180,7 +1195,7 @@ rule "40: Positive Control" rule "42: Mortality Statement" when FileAttribute(label == "OECD Number", value == "402") - $headline: Headline(containsString("Mortality") && !containsString("TABLE")) + $headline: Headline(containsString("Mortality") && !containsString("TABLE") && hasParagraphs()) then var entity = entityCreationService.byBoundary(Boundary.merge($headline.getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "mortality_statement", EntityType.ENTITY, $headline.getParent()); entity.setRedactionReason("Mortality Statement found"); @@ -1196,22 +1211,35 @@ rule "43: Dose Mortality" $table: Table( (hasHeader("Mortality") || hasHeader("Long Term Results") || hasHeader("Long Term Outcome") || hasHeader("Comments") || hasHeader("Viability / Mortality")) && - (hasHeader("Dose [mg/kg bodyweight]") || hasHeader("Dose (mg/kg)") || hasHeader("Dose levei (mg/kg)") || hasHeader("Dose Level (mg/kg)") || hasHeader("Dose level (mg/kg)") || hasHeader("Dosage [mg/kg body weight]")) + (hasHeader("Dose [mg/kg bodyweight]") || hasHeader("Dose [mg/kg body weight]") ||hasHeader("Dose (mg/kg)") || hasHeader("Dose levei (mg/kg)") || hasHeader("Dose Level (mg/kg)") || hasHeader("Dose level (mg/kg)") || hasHeader("Dosage [mg/kg body weight]")) ) then Stream.of($table.streamTableCellsWithHeader("Mortality"), $table.streamTableCellsWithHeader("Comments"), $table.streamTableCellsWithHeader("Long Term Results"), $table.streamTableCellsWithHeader("Long Term Outcome"), - $table.streamTableCellsWithHeader("Viability / Mortality"), - $table.streamTableCellsWithHeader("Dose [mg/kg bodyweight]"), + $table.streamTableCellsWithHeader("Viability / Mortality") + ).flatMap(a -> a) + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> { + redactionEntity.setRedaction(true); + redactionEntity.addMatchedRule("43"); + redactionEntity.setRedactionReason("Dose Mortality Data found."); + redactionEntity.setLegalBasis("n-a"); + insert(redactionEntity); + }); + + Stream.of($table.streamTableCellsWithHeader("Dose [mg/kg bodyweight]"), + $table.streamTableCellsWithHeader("Dose [mg/kg body weight]"), $table.streamTableCellsWithHeader("Dose levei (mg/kg)"), $table.streamTableCellsWithHeader("Dose Level (mg/kg)"), $table.streamTableCellsWithHeader("Dose level (mg/kg)"), $table.streamTableCellsWithHeader("Dose (mg/kg)"), $table.streamTableCellsWithHeader("Dosage [mg/kg body weight]") ).flatMap(a -> a) - .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality", EntityType.ENTITY)) + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality_dose", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) .forEach(redactionEntity -> { @@ -1229,18 +1257,23 @@ rule "44: Results (Main Study)" FileAttribute(label == "OECD Number", value == "429") $section: Section( getHeadline().containsString("Results") - && getHeadline().toString().length() < 20 + && getHeadline().getTextBlock().toString().length() < 20 + && hasParagraphs() && !( getHeadline().containsString("Appendix") || getHeadline().containsString("Table") ) ) then - var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "results_(main_study)", EntityType.ENTITY, $section); - entity.setRedactionReason("Results for main study found."); - entity.setLegalBasis("n-a"); - entity.setRedaction(true); - entity.addMatchedRule("44"); + var paragraphs = $section.streamAllSubNodesOfType(NodeType.PARAGRAPH).toList(); + for(var p : paragraphs){ + entityCreationService.bySemanticNode(p, "results_(main_study)", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Results for main study found."); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("44"); + }); + } end @@ -1248,7 +1281,8 @@ rule "45: Doses (mg/kg bodyweight)" when FileAttribute(label == "OECD Number", value == "402") $section: Section( - getHeadline().containsString("study design") + anyHeadlineContainsStringIgnoreCase("study design") + && hasParagraphs() ) then var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "doses_(mg_kg_bw)", EntityType.ENTITY, $section); -- 2.47.2