DM-307: Implemented betweenStringsInclusive #40

Merged
dominique.eiflaender1 merged 1 commits from DM-307 into master 2023-07-07 14:53:41 +02:00
4 changed files with 83 additions and 21 deletions

View File

@ -60,6 +60,42 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> betweenStringsInclusive(String start, String stop, String type, EntityType entityType, SemanticNode node) {
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock());
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock());
startBoundaries.forEach(boundary -> {
boundary.setStart(boundary.start() - start.length());
boundary.setEnd(boundary.end() - start.length());
});
stopBoundaries.forEach(boundary -> {
boundary.setStart(boundary.start() + stop.length());
boundary.setEnd(boundary.end() + stop.length());
});
return betweenBoundaries(startBoundaries, stopBoundaries, type, entityType, node);
}
public Stream<RedactionEntity> betweenStringsInclusiveIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock());
startBoundaries.forEach(boundary -> {
boundary.setStart(boundary.start() - start.length());
boundary.setEnd(boundary.end() - start.length());
});
stopBoundaries.forEach(boundary -> {
boundary.setStart(boundary.start() + stop.length());
boundary.setEnd(boundary.end() + stop.length());
});
return betweenBoundaries(startBoundaries, stopBoundaries, type, entityType, node);
}
public Stream<RedactionEntity> betweenStringsIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());

View File

@ -45,7 +45,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
@Disabled
public void titleExtraction() throws IOException {
AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/A13617AV/425_F.1.1.1 - A13617AV - Acute Oral Toxicity Study.pdf");
AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/ProblemDocs/15 - EVIDIS - Toxicidade oral aguda.pdf");
System.out.println("Start Full integration test");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));

View File

@ -578,7 +578,7 @@ rule "DOC.13.0: Clinical Signs"
)
then
entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_signs", EntityType.ENTITY)
.forEach(entity -> entity.applyWithLineBreaks("DOC.13.0", "Clinical Signs found", "n-a"));
.forEach(entity -> entity.apply("DOC.13.0", "Clinical Signs found", "n-a"));
end
@ -590,10 +590,10 @@ rule "DOC.14.0: Dosages"
&& !getHeadline().containsString("TABLE")
)
then
entityCreationService.betweenStrings("The animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> {
entityCreationService.betweenStringsInclusive("The animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> {
entity.apply("DOC.14.0", "Dosage found", "n-a");
});
entityCreationService.betweenStrings("Animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> {
entityCreationService.betweenStringsInclusive("Animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> {
entity.apply("DOC.14.0", "Dosage found", "n-a");
});
entityCreationService.byRegexWithLineBreaks("(?:\\.[\\s|\\n]|^.{5,20}\\n)([^\\.]{1,200}(?:animal|given|received)[^\\.]{1,200}dose\\s(?:levels?\\s)?(?:of|at)[^\\.]{1,200})(?:\\.[\\s|\\n|$])", "dosages", EntityType.ENTITY,1, $section).forEach(entity -> {
@ -608,7 +608,7 @@ rule "DOC.15.0: Mortality"
FileAttribute(label == "OECD Number", value == "425")
then
entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality", EntityType.ENTITY)
.forEach(entity -> entity.applyWithLineBreaks("DOC.15.0", "Mortality found", "n-a"));
.forEach(entity -> entity.apply("DOC.15.0", "Mortality found", "n-a"));
end
@ -620,7 +620,7 @@ rule "DOC.17.0: Study Conclusion"
)
then
entityCreationService.bySemanticNodeParagraphsOnly($section, "study_conclusion", EntityType.ENTITY)
.forEach(entity -> entity.applyWithLineBreaks("DOC.17.0", "Study Conclusion found", "n-a"));
.forEach(entity -> entity.apply("DOC.17.0", "Study Conclusion found", "n-a"));
end
@ -638,7 +638,7 @@ rule "DOC.18.0: Weight Behavior Changes"
)
then
entityCreationService.bySemanticNodeParagraphsOnly($section, "weight_behavior_changes", EntityType.ENTITY)
.forEach(entity -> entity.applyWithLineBreaks("DOC.18.0", "Weight behavior changes found", "n-a"));
.forEach(entity -> entity.apply("DOC.18.0", "Weight behavior changes found", "n-a"));
end
@ -656,7 +656,7 @@ rule "DOC.19.0: Necropsy findings"
)
then
entityCreationService.bySemanticNodeParagraphsOnly($section, "necropsy_findings", EntityType.ENTITY)
.forEach( entity -> entity.applyWithLineBreaks("DOC.19.0", "Necropsy section found", "n-a"));
.forEach( entity -> entity.apply("DOC.19.0", "Necropsy section found", "n-a"));
end
@ -675,7 +675,7 @@ rule "DOC.22.0: Clinical observations"
)
then
entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_observations", EntityType.ENTITY)
.forEach(entity -> entity.applyWithLineBreaks("DOC.22.0", "Clinical observations section found", "n-a"));
.forEach(entity -> entity.apply("DOC.22.0", "Clinical observations section found", "n-a"));
end
@ -731,7 +731,7 @@ rule "DOC.23.0: Bodyweight changes"
)
then
entityCreationService.bySemanticNodeParagraphsOnly($section, "bodyweight_changes", EntityType.ENTITY)
.forEach(entity -> entity.applyWithLineBreaks("DOC.23.0", "Bodyweight section found", "n-a"));
.forEach(entity -> entity.apply("DOC.23.0", "Bodyweight section found", "n-a"));
end
@ -743,7 +743,7 @@ rule "DOC.24.0: Study Design"
)
then
entityCreationService.bySemanticNodeParagraphsOnly($section, "study_design", EntityType.ENTITY)
.forEach(entity -> entity.applyWithLineBreaks("DOC.24.0", "Study design section found", "n-a"));
.forEach(entity -> entity.apply("DOC.24.0", "Study design section found", "n-a"));
end
@ -764,7 +764,7 @@ rule "DOC.25.0: Results and Conclusion (406, 428, 438, 439, 474 & 487)"
)
then
entityCreationService.bySemanticNodeParagraphsOnly($section, "results_and_conclusion", EntityType.ENTITY)
.forEach(entity -> entity.applyWithLineBreaks("DOC.25.0", "Results and Conclusion found", "n-a"));
.forEach(entity -> entity.apply("DOC.25.0", "Results and Conclusion found", "n-a"));
end
@ -780,7 +780,7 @@ rule "DOC.26.0: Detailing (404 & 405)"
)
then
entityCreationService.bySemanticNodeParagraphsOnly($section, "detailing", EntityType.ENTITY)
.forEach(entity -> entity.applyWithLineBreaks("DOC.26.0", "Detailing found", "n-a"));
.forEach(entity -> entity.apply("DOC.26.0", "Detailing found", "n-a"));
end
@ -793,7 +793,7 @@ rule "DOC.32.0: Preliminary Test Results (429)"
)
then
entityCreationService.bySemanticNodeParagraphsOnly($section, "preliminary_test_results", EntityType.ENTITY)
.forEach(entity -> entity.applyWithLineBreaks("DOC.32.0", "Preliminary Test Results found", "n-a"));
.forEach(entity -> entity.apply("DOC.32.0", "Preliminary Test Results found", "n-a"));
end
@ -803,7 +803,7 @@ rule "DOC.33.0: Test Results (429)"
$section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment")))
then
entityCreationService.bySemanticNodeParagraphsOnly($section, "test_results", EntityType.ENTITY)
.forEach(entity -> entity.applyWithLineBreaks("DOC.33.0", "Test Results found", "n-a"));
.forEach(entity -> entity.apply("DOC.33.0", "Test Results found", "n-a"));
end
@ -938,7 +938,7 @@ rule "DOC.39.0: Dilution of the test substance"
)
then
entityCreationService.bySemanticNodeParagraphsOnly($section, "dilution", EntityType.ENTITY)
.forEach(entity -> entity.applyWithLineBreaks("DOC.39.0", "Dilution found.", "n-a"));
.forEach(entity -> entity.apply("DOC.39.0", "Dilution found.", "n-a"));
end
@ -951,7 +951,7 @@ rule "DOC.40.0: Positive Control"
)
then
entityCreationService.bySemanticNodeParagraphsOnly($section, "positive_control", EntityType.ENTITY)
.forEach(entity -> entity.applyWithLineBreaks("DOC.40.0", "Positive control found.", "n-a"));
.forEach(entity -> entity.apply("DOC.40.0", "Positive control found.", "n-a"));
end
@ -961,7 +961,7 @@ rule "DOC.42.0: Mortality Statement"
$headline: Headline(containsString("Mortality") && !containsString("TABLE"))
then
entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality_statement", EntityType.ENTITY)
.forEach(entity -> entity.applyWithLineBreaks("DOC.42.0", "Mortality Statement found", "n-a"));
.forEach(entity -> entity.apply("DOC.42.0", "Mortality Statement found", "n-a"));
end
@ -1016,7 +1016,7 @@ rule "DOC.44.0: Results (Main Study)"
)
then
entityCreationService.bySemanticNodeParagraphsOnly($section, "results_(main_study)", EntityType.ENTITY)
.forEach(entity -> entity.applyWithLineBreaks("DOC.44.0", "Results for main study found.", "n-a"));
.forEach(entity -> entity.apply("DOC.44.0", "Results for main study found.", "n-a"));
end
@ -1028,7 +1028,7 @@ rule "DOC.45.0: Doses (mg/kg bodyweight)"
)
then
entityCreationService.bySemanticNodeParagraphsOnly($section, "doses_(mg_kg_bw)", EntityType.ENTITY)
.forEach(entity -> entity.applyWithLineBreaks("DOC.45.0", "Doses per bodyweight information found", "n-a"));
.forEach(entity -> entity.apply("DOC.45.0", "Doses per bodyweight information found", "n-a"));
end
// This is just an example for new rules feature.
@ -1041,7 +1041,7 @@ rule "DOC.45.0: Doses (mg/kg bodyweight)"
// System.out.println($headline);
// entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "irgendwas", EntityType.ENTITY)
// .forEach(entity -> {
// entity.applyWithLineBreaks("DOC.6.0", "positive control substance found", "n-a");
// entity.apply("DOC.6.0", "positive control substance found", "n-a");
// });
// end
@ -1114,6 +1114,32 @@ rule "MAN.3.0: Apply image recategorization"
//------------------------------------ Entity merging rules ------------------------------------
// Rule unit: X.0
rule "X.0.0: remove Entity contained by Entity of same type"
salience 65
when
$larger: RedactionEntity($type: type, $entityType: entityType, isActive())
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
then
$contained.remove("X.0.0", "remove Entity contained by Entity of same type");
retract($contained);
end
// Rule unit: X.1
rule "X.1.0: merge intersecting Entities of same type"
salience 64
when
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
then
RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document);
$first.remove("X.1.0", "merge intersecting Entities of same type");
$second.remove("X.1.0", "merge intersecting Entities of same type");
retract($first);
retract($second);
mergedEntity.getIntersectingNodes().forEach(node -> update(node));
end
//------------------------------------ File attributes rules ------------------------------------