DM-307: Rules that lead to files in error state because section has no paragrapghs #25

Merged
dominique.eiflaender1 merged 1 commits from DM-307-4 into master 2023-06-30 11:51:47 +02:00
2 changed files with 70 additions and 32 deletions

View File

@ -75,4 +75,8 @@ public class Headline implements GenericSemanticNode {
return Headline.builder().leafTextBlock(AtomicTextBlock.empty(-1L, 0, new Page(), -1, null)).build();
}
public boolean hasParagraphs(){
return getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).findFirst().isPresent();
}
}

View File

@ -684,6 +684,7 @@ rule "13: Clinical Signs"
|| getHeadline().containsString("Macroscopic Findings")
)
&& !getHeadline().containsString("TABLE")
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "clinical_signs", EntityType.ENTITY, $section);
@ -730,7 +731,7 @@ rule "14: Dosages"
rule "15: Mortality"
when
$headline: Headline(containsString("Mortality") && !containsString("TABLE"))
$headline: Headline(containsString("Mortality") && !containsString("TABLE") && hasParagraphs())
FileAttribute(label == "OECD Number", value == "425")
then
@ -748,6 +749,7 @@ rule "17: Study Conclusion"
FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471"))
$section: Section(
getHeadline().containsString("Conclusion")
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "study_conclusion", EntityType.ENTITY, $section);
@ -769,6 +771,7 @@ rule "18: Weight Behavior Changes"
|| containsString("bodyweight")
|| containsString("bodyweights")
)
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "weight_behavior_changes", EntityType.ENTITY, $section);
@ -789,6 +792,7 @@ rule "19: Necropsy findings"
)
&& !getHeadline().containsStringIgnoreCase("Table")
&& !getHeadline().containsStringIgnoreCase("Appendix")
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "necropsy_findings", EntityType.ENTITY, $section);
@ -811,6 +815,7 @@ rule "22: Clinical observations"
)
&& !anyHeadlineContainsStringIgnoreCase("Appendix")
&& !anyHeadlineContainsStringIgnoreCase("Table")
&& hasParagraphs()
)
then
@ -901,6 +906,7 @@ rule "24: Study Design"
FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","404","405","406","428","429","438","439","474","487"))
$section: Section(
anyHeadlineContainsStringIgnoreCase("study design")
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "study_design", EntityType.ENTITY, $section);
@ -920,7 +926,9 @@ rule "25: Results and Conclusion (406, 428, 438, 439, 474 & 487)"
&& !getHeadline().containsString("POSITIVE CONTROL") && !getHeadline().containsString("Positive Control")
&& !getHeadline().containsString("Evaluation") && !getHeadline().containsString("Micronucleus") &&
!getHeadline().containsString("TABLE") && !getHeadline().containsString("DISCUSSION") &&
!getHeadline().containsString("CONCLUSIONS") && !getHeadline().containsString("Interpretation") && !getHeadline().containsString("Viability"))
!getHeadline().containsString("CONCLUSIONS") && !getHeadline().containsString("Interpretation") && !getHeadline().containsString("Viability")
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "results_and_conclusion", EntityType.ENTITY, $section);
entity.setRedactionReason("Results and Conclusion found");
@ -935,14 +943,18 @@ rule "26: Detailing (404 & 405)"
when
FileAttribute(label == "OECD Number", valueEqualsAnyOf("404","405"))
$section: Section(
getHeadline().containsStringIgnoreCase("Results") && !getHeadline().containsStringIgnoreCase("Evaluation") && !getHeadline().containsStringIgnoreCase("study") && hasParagraphs()
getHeadline().containsStringIgnoreCase("Results") && !getHeadline().containsStringIgnoreCase("Evaluation") && !getHeadline().containsStringIgnoreCase("study") && !getHeadline().containsStringIgnoreCase("discussion") && hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "detailing", EntityType.ENTITY, $section);
entity.setRedactionReason("Detailing found");
entity.setLegalBasis("n-a");
entity.setRedaction(true);
entity.addMatchedRule("26");
var paragraphs = $section.streamAllSubNodesOfType(NodeType.PARAGRAPH).toList();
for(var p : paragraphs){
entityCreationService.bySemanticNode(p, "detailing", EntityType.ENTITY).ifPresent(entity -> {
entity.setRedactionReason("Detailing found");
entity.setLegalBasis("n-a");
entity.setRedaction(true);
entity.addMatchedRule("26");
});
}
end
@ -953,6 +965,7 @@ rule "32: Preliminary Test Results (429)"
(
(getHeadline().containsString("Preliminary Screening Test") && containsString("Clinical observations"))
|| getHeadline().containsString("Pre-Experiment"))
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "preliminary_test_results", EntityType.ENTITY, $section);
@ -966,7 +979,7 @@ rule "32: Preliminary Test Results (429)"
rule "33: Test Results (429)"
when
FileAttribute(label == "OECD Number", value == "429")
$section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment")))
$section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment")) && hasParagraphs())
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "test_results", EntityType.ENTITY, $section);
entity.setRedactionReason("Test Results found");
@ -1002,8 +1015,8 @@ rule "35: Sex"
FileAttribute(label == "OECD Number", valueEqualsAnyOf("405","429"))
$section: Section(
(
getHeadline().containsStringIgnoreCase("animal")
|| getHeadline().containsStringIgnoreCase("test system")
anyHeadlineContainsStringIgnoreCase("animal")
|| anyHeadlineContainsStringIgnoreCase("test system")
)
&& !getHeadline().containsStringIgnoreCase("selection")
&& (
@ -1029,9 +1042,9 @@ rule "35a: Animal Number 405"
FileAttribute(label == "OECD Number", value == "405")
$section: Section(
(
getHeadline().containsString("animal")
|| getHeadline().containsString("test system")
|| getHeadline().containsString("reaction")
anyHeadlineContainsStringIgnoreCase("animal")
|| anyHeadlineContainsStringIgnoreCase("test system")
|| anyHeadlineContainsStringIgnoreCase("reaction")
)
&& !getHeadline().containsString("selection")
&& (
@ -1063,8 +1076,8 @@ rule "35b: Animal Number 429"
FileAttribute(label == "OECD Number", value == "429")
$section: Section(
(
getHeadline().containsString("animal")
|| getHeadline().containsString("test system")
getHeadline().containsStringIgnoreCase("animal")
|| getHeadline().containsStringIgnoreCase("test system")
)
&& !getHeadline().containsString("selection")
&& containsStringIgnoreCase("number of animals")
@ -1085,7 +1098,7 @@ rule "35b: Animal Number 429"
entity.setRedaction(true);
entity.addMatchedRule("35");
});
entityCreationService.byRegexIgnoreCase("([\\d]{1,3})[\\w\\s\\/]{0,20}(?:treatment )?group\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> {
entityCreationService.byRegexIgnoreCase("([\\d]{1,3})[\\w\\s\\/]{0,20}(?:treatment )?group\\b", "number_of_animals", EntityType.ENTITY,1 , $section).forEach(entity -> {
entity.setRedactionReason("Number of animals in group found");
entity.setLegalBasis("n-a");
entity.setRedaction(true);
@ -1147,6 +1160,7 @@ rule "39: Dilution of the test substance"
$section: Section(
getHeadline().containsString("Formulation")
&& containsString("dilution")
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "dilution", EntityType.ENTITY, $section);
@ -1161,11 +1175,12 @@ rule "40: Positive Control"
when
FileAttribute(label == "OECD Number", value == "429")
$section: Section(
getHeadline().containsString("Positive Control")
getHeadline().containsStringIgnoreCase("Positive Control")
&& !(
getHeadline().containsString("Appendix")
|| getHeadline().containsString("Table")
getHeadline().containsStringIgnoreCase("Appendix")
|| getHeadline().containsStringIgnoreCase("Table")
)
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "positive_control", EntityType.ENTITY, $section);
@ -1180,7 +1195,7 @@ rule "40: Positive Control"
rule "42: Mortality Statement"
when
FileAttribute(label == "OECD Number", value == "402")
$headline: Headline(containsString("Mortality") && !containsString("TABLE"))
$headline: Headline(containsString("Mortality") && !containsString("TABLE") && hasParagraphs())
then
var entity = entityCreationService.byBoundary(Boundary.merge($headline.getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "mortality_statement", EntityType.ENTITY, $headline.getParent());
entity.setRedactionReason("Mortality Statement found");
@ -1196,22 +1211,35 @@ rule "43: Dose Mortality"
$table: Table(
(hasHeader("Mortality") || hasHeader("Long Term Results") || hasHeader("Long Term Outcome") || hasHeader("Comments") || hasHeader("Viability / Mortality"))
&&
(hasHeader("Dose [mg/kg bodyweight]") || hasHeader("Dose (mg/kg)") || hasHeader("Dose levei (mg/kg)") || hasHeader("Dose Level (mg/kg)") || hasHeader("Dose level (mg/kg)") || hasHeader("Dosage [mg/kg body weight]"))
(hasHeader("Dose [mg/kg bodyweight]") || hasHeader("Dose [mg/kg body weight]") ||hasHeader("Dose (mg/kg)") || hasHeader("Dose levei (mg/kg)") || hasHeader("Dose Level (mg/kg)") || hasHeader("Dose level (mg/kg)") || hasHeader("Dosage [mg/kg body weight]"))
)
then
Stream.of($table.streamTableCellsWithHeader("Mortality"),
$table.streamTableCellsWithHeader("Comments"),
$table.streamTableCellsWithHeader("Long Term Results"),
$table.streamTableCellsWithHeader("Long Term Outcome"),
$table.streamTableCellsWithHeader("Viability / Mortality"),
$table.streamTableCellsWithHeader("Dose [mg/kg bodyweight]"),
$table.streamTableCellsWithHeader("Viability / Mortality")
).flatMap(a -> a)
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality", EntityType.ENTITY))
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(redactionEntity -> {
redactionEntity.setRedaction(true);
redactionEntity.addMatchedRule("43");
redactionEntity.setRedactionReason("Dose Mortality Data found.");
redactionEntity.setLegalBasis("n-a");
insert(redactionEntity);
});
Stream.of($table.streamTableCellsWithHeader("Dose [mg/kg bodyweight]"),
$table.streamTableCellsWithHeader("Dose [mg/kg body weight]"),
$table.streamTableCellsWithHeader("Dose levei (mg/kg)"),
$table.streamTableCellsWithHeader("Dose Level (mg/kg)"),
$table.streamTableCellsWithHeader("Dose level (mg/kg)"),
$table.streamTableCellsWithHeader("Dose (mg/kg)"),
$table.streamTableCellsWithHeader("Dosage [mg/kg body weight]")
).flatMap(a -> a)
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality", EntityType.ENTITY))
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality_dose", EntityType.ENTITY))
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(redactionEntity -> {
@ -1229,18 +1257,23 @@ rule "44: Results (Main Study)"
FileAttribute(label == "OECD Number", value == "429")
$section: Section(
getHeadline().containsString("Results")
&& getHeadline().toString().length() < 20
&& getHeadline().getTextBlock().toString().length() < 20
&& hasParagraphs()
&& !(
getHeadline().containsString("Appendix")
|| getHeadline().containsString("Table")
)
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "results_(main_study)", EntityType.ENTITY, $section);
entity.setRedactionReason("Results for main study found.");
entity.setLegalBasis("n-a");
entity.setRedaction(true);
entity.addMatchedRule("44");
var paragraphs = $section.streamAllSubNodesOfType(NodeType.PARAGRAPH).toList();
for(var p : paragraphs){
entityCreationService.bySemanticNode(p, "results_(main_study)", EntityType.ENTITY).ifPresent(entity -> {
entity.setRedactionReason("Results for main study found.");
entity.setLegalBasis("n-a");
entity.setRedaction(true);
entity.addMatchedRule("44");
});
}
end
@ -1248,7 +1281,8 @@ rule "45: Doses (mg/kg bodyweight)"
when
FileAttribute(label == "OECD Number", value == "402")
$section: Section(
getHeadline().containsString("study design")
anyHeadlineContainsStringIgnoreCase("study design")
&& hasParagraphs()
)
then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "doses_(mg_kg_bw)", EntityType.ENTITY, $section);