DM-307: Implemented betweenStringsInclusive #40
@ -60,6 +60,42 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> betweenStringsInclusive(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock());
|
||||
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock());
|
||||
|
||||
startBoundaries.forEach(boundary -> {
|
||||
boundary.setStart(boundary.start() - start.length());
|
||||
boundary.setEnd(boundary.end() - start.length());
|
||||
});
|
||||
stopBoundaries.forEach(boundary -> {
|
||||
boundary.setStart(boundary.start() + stop.length());
|
||||
boundary.setEnd(boundary.end() + stop.length());
|
||||
});
|
||||
|
||||
return betweenBoundaries(startBoundaries, stopBoundaries, type, entityType, node);
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> betweenStringsInclusiveIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
|
||||
List<Boundary> stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock());
|
||||
|
||||
startBoundaries.forEach(boundary -> {
|
||||
boundary.setStart(boundary.start() - start.length());
|
||||
boundary.setEnd(boundary.end() - start.length());
|
||||
});
|
||||
stopBoundaries.forEach(boundary -> {
|
||||
boundary.setStart(boundary.start() + stop.length());
|
||||
boundary.setEnd(boundary.end() + stop.length());
|
||||
});
|
||||
|
||||
return betweenBoundaries(startBoundaries, stopBoundaries, type, entityType, node);
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> betweenStringsIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
List<Boundary> startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock());
|
||||
|
||||
@ -45,7 +45,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||
@Disabled
|
||||
public void titleExtraction() throws IOException {
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/A13617AV/425_F.1.1.1 - A13617AV - Acute Oral Toxicity Study.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/ProblemDocs/15 - EVIDIS - Toxicidade oral aguda.pdf");
|
||||
|
||||
System.out.println("Start Full integration test");
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
|
||||
@ -578,7 +578,7 @@ rule "DOC.13.0: Clinical Signs"
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_signs", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.13.0", "Clinical Signs found", "n-a"));
|
||||
.forEach(entity -> entity.apply("DOC.13.0", "Clinical Signs found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -590,10 +590,10 @@ rule "DOC.14.0: Dosages"
|
||||
&& !getHeadline().containsString("TABLE")
|
||||
)
|
||||
then
|
||||
entityCreationService.betweenStrings("The animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> {
|
||||
entityCreationService.betweenStringsInclusive("The animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> {
|
||||
entity.apply("DOC.14.0", "Dosage found", "n-a");
|
||||
});
|
||||
entityCreationService.betweenStrings("Animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> {
|
||||
entityCreationService.betweenStringsInclusive("Animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> {
|
||||
entity.apply("DOC.14.0", "Dosage found", "n-a");
|
||||
});
|
||||
entityCreationService.byRegexWithLineBreaks("(?:\\.[\\s|\\n]|^.{5,20}\\n)([^\\.]{1,200}(?:animal|given|received)[^\\.]{1,200}dose\\s(?:levels?\\s)?(?:of|at)[^\\.]{1,200})(?:\\.[\\s|\\n|$])", "dosages", EntityType.ENTITY,1, $section).forEach(entity -> {
|
||||
@ -608,7 +608,7 @@ rule "DOC.15.0: Mortality"
|
||||
FileAttribute(label == "OECD Number", value == "425")
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.15.0", "Mortality found", "n-a"));
|
||||
.forEach(entity -> entity.apply("DOC.15.0", "Mortality found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -620,7 +620,7 @@ rule "DOC.17.0: Study Conclusion"
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "study_conclusion", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.17.0", "Study Conclusion found", "n-a"));
|
||||
.forEach(entity -> entity.apply("DOC.17.0", "Study Conclusion found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -638,7 +638,7 @@ rule "DOC.18.0: Weight Behavior Changes"
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "weight_behavior_changes", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.18.0", "Weight behavior changes found", "n-a"));
|
||||
.forEach(entity -> entity.apply("DOC.18.0", "Weight behavior changes found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -656,7 +656,7 @@ rule "DOC.19.0: Necropsy findings"
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "necropsy_findings", EntityType.ENTITY)
|
||||
.forEach( entity -> entity.applyWithLineBreaks("DOC.19.0", "Necropsy section found", "n-a"));
|
||||
.forEach( entity -> entity.apply("DOC.19.0", "Necropsy section found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -675,7 +675,7 @@ rule "DOC.22.0: Clinical observations"
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_observations", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.22.0", "Clinical observations section found", "n-a"));
|
||||
.forEach(entity -> entity.apply("DOC.22.0", "Clinical observations section found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -731,7 +731,7 @@ rule "DOC.23.0: Bodyweight changes"
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "bodyweight_changes", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.23.0", "Bodyweight section found", "n-a"));
|
||||
.forEach(entity -> entity.apply("DOC.23.0", "Bodyweight section found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -743,7 +743,7 @@ rule "DOC.24.0: Study Design"
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "study_design", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.24.0", "Study design section found", "n-a"));
|
||||
.forEach(entity -> entity.apply("DOC.24.0", "Study design section found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -764,7 +764,7 @@ rule "DOC.25.0: Results and Conclusion (406, 428, 438, 439, 474 & 487)"
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "results_and_conclusion", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.25.0", "Results and Conclusion found", "n-a"));
|
||||
.forEach(entity -> entity.apply("DOC.25.0", "Results and Conclusion found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -780,7 +780,7 @@ rule "DOC.26.0: Detailing (404 & 405)"
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "detailing", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.26.0", "Detailing found", "n-a"));
|
||||
.forEach(entity -> entity.apply("DOC.26.0", "Detailing found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -793,7 +793,7 @@ rule "DOC.32.0: Preliminary Test Results (429)"
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "preliminary_test_results", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.32.0", "Preliminary Test Results found", "n-a"));
|
||||
.forEach(entity -> entity.apply("DOC.32.0", "Preliminary Test Results found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -803,7 +803,7 @@ rule "DOC.33.0: Test Results (429)"
|
||||
$section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment")))
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "test_results", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.33.0", "Test Results found", "n-a"));
|
||||
.forEach(entity -> entity.apply("DOC.33.0", "Test Results found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -938,7 +938,7 @@ rule "DOC.39.0: Dilution of the test substance"
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "dilution", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.39.0", "Dilution found.", "n-a"));
|
||||
.forEach(entity -> entity.apply("DOC.39.0", "Dilution found.", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -951,7 +951,7 @@ rule "DOC.40.0: Positive Control"
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "positive_control", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.40.0", "Positive control found.", "n-a"));
|
||||
.forEach(entity -> entity.apply("DOC.40.0", "Positive control found.", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -961,7 +961,7 @@ rule "DOC.42.0: Mortality Statement"
|
||||
$headline: Headline(containsString("Mortality") && !containsString("TABLE"))
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality_statement", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.42.0", "Mortality Statement found", "n-a"));
|
||||
.forEach(entity -> entity.apply("DOC.42.0", "Mortality Statement found", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -1016,7 +1016,7 @@ rule "DOC.44.0: Results (Main Study)"
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "results_(main_study)", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.44.0", "Results for main study found.", "n-a"));
|
||||
.forEach(entity -> entity.apply("DOC.44.0", "Results for main study found.", "n-a"));
|
||||
end
|
||||
|
||||
|
||||
@ -1028,7 +1028,7 @@ rule "DOC.45.0: Doses (mg/kg bodyweight)"
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNodeParagraphsOnly($section, "doses_(mg_kg_bw)", EntityType.ENTITY)
|
||||
.forEach(entity -> entity.applyWithLineBreaks("DOC.45.0", "Doses per bodyweight information found", "n-a"));
|
||||
.forEach(entity -> entity.apply("DOC.45.0", "Doses per bodyweight information found", "n-a"));
|
||||
end
|
||||
|
||||
// This is just an example for new rules feature.
|
||||
@ -1041,7 +1041,7 @@ rule "DOC.45.0: Doses (mg/kg bodyweight)"
|
||||
// System.out.println($headline);
|
||||
// entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "irgendwas", EntityType.ENTITY)
|
||||
// .forEach(entity -> {
|
||||
// entity.applyWithLineBreaks("DOC.6.0", "positive control substance found", "n-a");
|
||||
// entity.apply("DOC.6.0", "positive control substance found", "n-a");
|
||||
// });
|
||||
// end
|
||||
|
||||
@ -1114,6 +1114,32 @@ rule "MAN.3.0: Apply image recategorization"
|
||||
|
||||
//------------------------------------ Entity merging rules ------------------------------------
|
||||
|
||||
// Rule unit: X.0
|
||||
rule "X.0.0: remove Entity contained by Entity of same type"
|
||||
salience 65
|
||||
when
|
||||
$larger: RedactionEntity($type: type, $entityType: entityType, isActive())
|
||||
$contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
then
|
||||
$contained.remove("X.0.0", "remove Entity contained by Entity of same type");
|
||||
retract($contained);
|
||||
end
|
||||
|
||||
|
||||
// Rule unit: X.1
|
||||
rule "X.1.0: merge intersecting Entities of same type"
|
||||
salience 64
|
||||
when
|
||||
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
|
||||
then
|
||||
RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document);
|
||||
$first.remove("X.1.0", "merge intersecting Entities of same type");
|
||||
$second.remove("X.1.0", "merge intersecting Entities of same type");
|
||||
retract($first);
|
||||
retract($second);
|
||||
mergedEntity.getIntersectingNodes().forEach(node -> update(node));
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ File attributes rules ------------------------------------
|
||||
|
||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user