DM-307: Changed rule to applyWithLineBreaks, fixes applyWithLineBreaks #37
@ -7,6 +7,7 @@ import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
@ -210,7 +211,7 @@ public class AtomicTextBlock implements TextBlock {
|
||||
}
|
||||
|
||||
CharSequence subSequence = subSequence(boundary);
|
||||
Set<Integer> lbInBoundary = lineBreaks.stream().filter(boundary::contains).collect(Collectors.toSet());
|
||||
Set<Integer> lbInBoundary = new HashSet<>(lineBreaks);
|
||||
if (boundary.end() == getBoundary().end()) {
|
||||
lbInBoundary.add(getBoundary().length());
|
||||
}
|
||||
|
||||
@ -44,7 +44,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||
@Test
|
||||
public void titleExtraction() throws IOException {
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/A8591B/15-Curacron_ToxicidadeAgudaOral.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/A13617AV/425_F.1.1.1 - A13617AV - Acute Oral Toxicity Study.pdf");
|
||||
|
||||
System.out.println("Start Full integration test");
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
|
||||
@ -292,6 +292,7 @@ rule "DOC.4.3: Species"
|
||||
then
|
||||
$section.getEntitiesOfType("species").forEach(entity -> {
|
||||
entity.apply("DOC.4.3", "Species found.", "n-a");
|
||||
entity.setValue(entity.getValue().toLowerCase());
|
||||
});
|
||||
end
|
||||
|
||||
@ -313,18 +314,7 @@ rule "DOC.5.0: Strain"
|
||||
entity.apply("DOC.5.0", "Strain found.", "n-a");
|
||||
});
|
||||
end
|
||||
rule "DOC.6.0"
|
||||
when
|
||||
Headline(containsStringIgnoreCase("materials and methods"), $sectionIdentifierMaterials: getSectionIdentifier())
|
||||
Headline(containsStringIgnoreCase("controls"), getSectionIdentifier().isChildOf($sectionIdentifierMaterials), $sectionIdentifierControls: getSectionIdentifier())
|
||||
$headline: Headline(containsStringIgnoreCase("positive control substances"), getSectionIdentifier().isChildOf($sectionIdentifierControls))
|
||||
then
|
||||
System.out.println($headline);
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "irgendwas", EntityType.ENTITY)
|
||||
.forEach(entity -> {
|
||||
entity.apply("DOC.6.0", "positive control substance found", "n-a");
|
||||
});
|
||||
end
|
||||
|
||||
|
||||
//rule "DOC.7.0: study title by document structure"
|
||||
// when
|
||||
@ -507,7 +497,7 @@ rule "DOC.11.0: Conclusions - LD50, LC50, Confidence"
|
||||
when
|
||||
FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","425","436"))
|
||||
$section: Section(
|
||||
(getHeadline().containsString("Conclusion") || getHeadline().containsString("Lethality"))
|
||||
(getHeadline().containsStringIgnoreCase("Conclusion") || anyHeadlineContainsStringIgnoreCase("Lethality"))
|
||||
&& (containsString("LD") || containsString("LC") || containsString("50") || containsString("LD50") || containsString("lethal concentration") || containsString("lethal dose"))
|
||||
&& (
|
||||
containsString("greater than")
|
||||
@ -566,7 +556,7 @@ rule "DOC.12.1: Guideline Deviation in text"
|
||||
when
|
||||
FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471"))
|
||||
$section: Section(
|
||||
getHeadline().containsString("Introduction")
|
||||
getHeadline().containsStringIgnoreCase("Introduction")
|
||||
&& containsStringIgnoreCase("deviations from the protocol")
|
||||
)
|
||||
then
|
||||
@ -585,11 +575,10 @@ rule "DOC.13.0: Clinical Signs"
|
||||
|| getHeadline().containsString("Macroscopic Findings")
|
||||
)
|
||||
&& !getHeadline().containsString("TABLE")
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_signs", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.13.0", "Clinical Signs found", "n-a"));
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.13.0", "Clinical Signs found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -615,11 +604,11 @@ rule "DOC.14.0: Dosages"
|
||||
|
||||
rule "DOC.15.0: Mortality"
|
||||
when
|
||||
$headline: Headline(containsString("Mortality") && !containsString("TABLE") && hasParagraphs())
|
||||
$headline: Headline(containsString("Mortality") && !containsString("TABLE"))
|
||||
FileAttribute(label == "OECD Number", value == "425")
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.15.0", "Mortality found", "n-a"));
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.15.0", "Mortality found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -627,12 +616,11 @@ rule "DOC.17.0: Study Conclusion"
|
||||
when
|
||||
FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471"))
|
||||
$section: Section(
|
||||
getHeadline().containsString("Conclusion")
|
||||
&& hasParagraphs()
|
||||
getHeadline().containsStringIgnoreCase("Conclusion")
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "study_conclusion", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.17.0", "Study Conclusion found", "n-a"));
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.17.0", "Study Conclusion found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -647,11 +635,10 @@ rule "DOC.18.0: Weight Behavior Changes"
|
||||
|| containsString("bodyweight")
|
||||
|| containsString("bodyweights")
|
||||
)
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "weight_behavior_changes", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.18.0", "Weight behavior changes found", "n-a"));
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.18.0", "Weight behavior changes found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -666,11 +653,10 @@ rule "DOC.19.0: Necropsy findings"
|
||||
)
|
||||
&& !getHeadline().containsStringIgnoreCase("Table")
|
||||
&& !getHeadline().containsStringIgnoreCase("Appendix")
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "necropsy_findings", EntityType.ENTITY)
|
||||
.forEach( entity -> entity.apply("DOC.19.0", "Necropsy section found", "n-a"));
|
||||
.forEach( entity -> entity.applyWithLineBreaks("DOC.19.0", "Necropsy section found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -686,11 +672,10 @@ rule "DOC.22.0: Clinical observations"
|
||||
)
|
||||
&& !anyHeadlineContainsStringIgnoreCase("Appendix")
|
||||
&& !anyHeadlineContainsStringIgnoreCase("Table")
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_observations", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.22.0", "Clinical observations section found", "n-a"));
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.22.0", "Clinical observations section found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -743,11 +728,10 @@ rule "DOC.23.0: Bodyweight changes"
|
||||
)
|
||||
&& !getHeadline().containsStringIgnoreCase("Appendix")
|
||||
&& !getHeadline().containsStringIgnoreCase("TABLE")
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "bodyweight_changes", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.23.0", "Bodyweight section found", "n-a"));
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.23.0", "Bodyweight section found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -756,11 +740,10 @@ rule "DOC.24.0: Study Design"
|
||||
FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","404","405","406","428","429","438","439","474","487"))
|
||||
$section: Section(
|
||||
anyHeadlineContainsStringIgnoreCase("study design")
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "study_design", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.24.0", "Study design section found", "n-a"));
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.24.0", "Study design section found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -778,11 +761,10 @@ rule "DOC.25.0: Results and Conclusion (406, 428, 438, 439, 474 & 487)"
|
||||
&& !getHeadline().containsString("CONCLUSIONS")
|
||||
&& !getHeadline().containsString("Interpretation")
|
||||
&& !getHeadline().containsString("Viability")
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "results_and_conclusion", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.25.0", "Results and Conclusion found", "n-a"));
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.25.0", "Results and Conclusion found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -795,15 +777,10 @@ rule "DOC.26.0: Detailing (404 & 405)"
|
||||
&& !getHeadline().containsStringIgnoreCase("Evaluation")
|
||||
&& !getHeadline().containsStringIgnoreCase("study")
|
||||
&& !getHeadline().containsStringIgnoreCase("discussion")
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
var paragraphs = $section.streamAllSubNodesOfType(NodeType.PARAGRAPH).toList();
|
||||
for(var p : paragraphs){
|
||||
entityCreationService.bySemanticNode(p, "detailing", EntityType.ENTITY).ifPresent(entity -> {
|
||||
entity.apply("DOC.26.0", "Detailing found", "n-a");
|
||||
});
|
||||
}
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "detailing", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.26.0", "Detailing found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -813,21 +790,20 @@ rule "DOC.32.0: Preliminary Test Results (429)"
|
||||
$section: Section(
|
||||
((getHeadline().containsString("Preliminary Screening Test") && containsString("Clinical observations"))
|
||||
|| getHeadline().containsString("Pre-Experiment"))
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "preliminary_test_results", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.32.0", "Preliminary Test Results found", "n-a"));
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.32.0", "Preliminary Test Results found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
rule "DOC.33.0: Test Results (429)"
|
||||
when
|
||||
FileAttribute(label == "OECD Number", value == "429")
|
||||
$section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment")) && hasParagraphs())
|
||||
$section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment")))
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "test_results", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.33.0", "Test Results found", "n-a"));
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.33.0", "Test Results found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -959,11 +935,10 @@ rule "DOC.39.0: Dilution of the test substance"
|
||||
$section: Section(
|
||||
getHeadline().containsString("Formulation")
|
||||
&& containsString("dilution")
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "dilution", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.39.0", "Dilution found.", "n-a"));
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.39.0", "Dilution found.", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -973,21 +948,20 @@ rule "DOC.40.0: Positive Control"
|
||||
$section: Section(
|
||||
getHeadline().containsStringIgnoreCase("Positive Control")
|
||||
&& !(getHeadline().containsStringIgnoreCase("Appendix") || getHeadline().containsStringIgnoreCase("Table"))
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "positive_control", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.40.0", "Positive control found.", "n-a"));
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.40.0", "Positive control found.", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
rule "DOC.42.0: Mortality Statement"
|
||||
when
|
||||
FileAttribute(label == "OECD Number", value == "402")
|
||||
$headline: Headline(containsString("Mortality") && !containsString("TABLE") && hasParagraphs())
|
||||
$headline: Headline(containsString("Mortality") && !containsString("TABLE"))
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality_statement", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.42.0", "Mortality Statement found", "n-a"));
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.42.0", "Mortality Statement found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -1038,16 +1012,11 @@ rule "DOC.44.0: Results (Main Study)"
|
||||
$section: Section(
|
||||
getHeadline().containsString("Results")
|
||||
&& getHeadline().getBoundary().length() < 20
|
||||
&& hasParagraphs()
|
||||
&& !(getHeadline().containsString("Appendix") || getHeadline().containsString("Table"))
|
||||
)
|
||||
then
|
||||
var paragraphs = $section.streamAllSubNodesOfType(NodeType.PARAGRAPH).toList();
|
||||
for(var p : paragraphs){
|
||||
entityCreationService.bySemanticNode(p, "results_(main_study)", EntityType.ENTITY).ifPresent(entity -> {
|
||||
entity.apply("DOC.44.0", "Results for main study found.", "n-a");
|
||||
});
|
||||
}
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "results_(main_study)", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.44.0", "Results for main study found.", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -1056,13 +1025,25 @@ rule "DOC.45.0: Doses (mg/kg bodyweight)"
|
||||
FileAttribute(label == "OECD Number", value == "402")
|
||||
$section: Section(
|
||||
anyHeadlineContainsStringIgnoreCase("study design")
|
||||
&& hasParagraphs()
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "doses_(mg_kg_bw)", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.apply("DOC.45.0", "Doses per bodyweight information found", "n-a"));
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.45.0", "Doses per bodyweight information found", "n-a"));
|
||||
end
|
||||
|
||||
// This is just an example for new rules feature.
|
||||
//rule "DOC.99.0"
|
||||
// when
|
||||
// Headline(containsStringIgnoreCase("materials and methods"), $sectionIdentifierMaterials: getSectionIdentifier())
|
||||
// Headline(containsStringIgnoreCase("controls"), getSectionIdentifier().isChildOf($sectionIdentifierMaterials), $sectionIdentifierControls: getSectionIdentifier())
|
||||
// $headline: Headline(containsStringIgnoreCase("positive control substances"), getSectionIdentifier().isChildOf($sectionIdentifierControls))
|
||||
// then
|
||||
// System.out.println($headline);
|
||||
// entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "irgendwas", EntityType.ENTITY)
|
||||
// .forEach(entity -> {
|
||||
// entity.applyWithLineBreaks("DOC.6.0", "positive control substance found", "n-a");
|
||||
// });
|
||||
// end
|
||||
|
||||
//------------------------------------ Manual redaction rules ------------------------------------
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user