DM-305: Implemented anyHeadlineContainsString for DocuMine and fixed some DocuMine Rules
This commit is contained in:
parent
04656acd9d
commit
3447ee1856
@ -8,6 +8,7 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Do
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -73,4 +74,16 @@ public class Section implements GenericSemanticNode {
|
||||
.orElseGet(() -> getParent().getHeadline());
|
||||
}
|
||||
|
||||
|
||||
public boolean anyHeadlineContainsString(String value){
|
||||
return streamChildrenOfType(NodeType.HEADLINE)//
|
||||
.map(node -> (Headline) node).anyMatch(h -> h.containsString(value));
|
||||
}
|
||||
|
||||
|
||||
public boolean anyHeadlineContainsStringIgnoreCase(String value){
|
||||
return streamChildrenOfType(NodeType.HEADLINE)//
|
||||
.map(node -> (Headline) node).anyMatch(h -> h.containsStringIgnoreCase(value));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -43,7 +43,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||
@Test
|
||||
public void titleExtraction() throws IOException {
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/A13617AV/474_G.1.2 - 1768300_MMNA_A13617AV_report.pdf");
|
||||
AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/A13617AV/402_F.3.1 - A13617AV - Acute Dermal Toxicity.pdf");
|
||||
System.out.println("Start Full integration test");
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
System.out.println("Finished structure analysis");
|
||||
|
||||
@ -248,7 +248,7 @@ rule "3: Experimental Completion Date"
|
||||
// ignore species and strain in irrelevant study types
|
||||
rule "4a: Species"
|
||||
when
|
||||
FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487"))
|
||||
FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","471","474","487"))
|
||||
$section: Section(hasEntitiesOfType("species") || hasEntitiesOfType("strain"))
|
||||
then
|
||||
$section.getEntitiesOfType(List.of("species", "strain")).forEach(entity -> {
|
||||
@ -304,9 +304,9 @@ rule "5: Strain"
|
||||
hasEntitiesOfType("species")
|
||||
&& hasEntitiesOfType("strain")
|
||||
&& (
|
||||
getHeadline().containsString("test system")
|
||||
|| getHeadline().containsString("animals")
|
||||
|| getHeadline().containsString("specification")
|
||||
getHeadline().containsStringIgnoreCase("test system")
|
||||
|| getHeadline().containsStringIgnoreCase("animals")
|
||||
|| getHeadline().containsStringIgnoreCase("specification")
|
||||
)
|
||||
)
|
||||
then
|
||||
@ -515,7 +515,8 @@ rule "10a: Batch number"
|
||||
when
|
||||
$section: Section(
|
||||
(
|
||||
getHeadline().containsString("Test and Control Substances")
|
||||
anyHeadlineContainsString("Test Substance")
|
||||
|| getHeadline().containsString("Test and Control Substances")
|
||||
|| getHeadline().containsString("Test Substances")
|
||||
|| getHeadline().containsString("Test Substance")
|
||||
|| getHeadline().containsString("Test Item")
|
||||
@ -723,7 +724,6 @@ rule "15: Mortality"
|
||||
FileAttribute(label == "OECD Number", value == "425")
|
||||
then
|
||||
|
||||
//FIXME
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($headline.getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "mortality", EntityType.ENTITY, $headline.getParent());
|
||||
entity.setRedactionReason("Mortality found");
|
||||
entity.setLegalBasis("n-a");
|
||||
@ -735,23 +735,22 @@ rule "15: Mortality"
|
||||
|
||||
rule "17: Study Conclusion"
|
||||
when
|
||||
FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471"))
|
||||
$section: Section(
|
||||
getHeadline().containsString("Conclusion")
|
||||
)
|
||||
FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471"))
|
||||
$section: Section(
|
||||
getHeadline().containsString("Conclusion")
|
||||
)
|
||||
then
|
||||
entityCreationService.bySemanticNode($section, "study_conclusion", EntityType.ENTITY).ifPresent(entity -> {
|
||||
entity.setRedactionReason("Study Conclusion found");
|
||||
entity.setLegalBasis("n-a");
|
||||
entity.setRedaction(true);
|
||||
entity.addMatchedRule("17");
|
||||
});
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "study_conclusion", EntityType.ENTITY, $section);
|
||||
entity.setRedactionReason("Study Conclusion found");
|
||||
entity.setLegalBasis("n-a");
|
||||
entity.setRedaction(true);
|
||||
entity.addMatchedRule("17");
|
||||
end
|
||||
|
||||
|
||||
rule "18: Weight Behavior Changes"
|
||||
when
|
||||
FileAttribute(label == "OECD Number", value == "425")
|
||||
FileAttribute(label == "OECD Number", value == "402")
|
||||
$section: Section(
|
||||
getHeadline().containsString("Results")
|
||||
&& (
|
||||
@ -762,14 +761,11 @@ rule "18: Weight Behavior Changes"
|
||||
)
|
||||
)
|
||||
then
|
||||
|
||||
//FIXME
|
||||
entityCreationService.bySemanticNode($section, "weight_behavior_changes", EntityType.ENTITY).ifPresent(entity -> {
|
||||
entity.setRedactionReason("Weight behavior changes found");
|
||||
entity.setLegalBasis("n-a");
|
||||
entity.setRedaction(true);
|
||||
entity.addMatchedRule("18");
|
||||
});
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "weight_behavior_changes", EntityType.ENTITY, $section);
|
||||
entity.setRedactionReason("Weight behavior changes found");
|
||||
entity.setLegalBasis("n-a");
|
||||
entity.setRedaction(true);
|
||||
entity.addMatchedRule("18");
|
||||
end
|
||||
|
||||
rule "19: Necropsy findings"
|
||||
@ -1181,17 +1177,17 @@ rule "40: Positive Control"
|
||||
end
|
||||
|
||||
|
||||
|
||||
rule "42: Mortality Statement"
|
||||
when
|
||||
FileAttribute(label == "OECD Number", value == "402")
|
||||
$section: Section( getHeadline().containsString("Mortality") && !getHeadline().containsString("TABLE"))
|
||||
FileAttribute(label == "OECD Number", value == "402")
|
||||
$headline: Headline(containsString("Mortality") && !containsString("TABLE"))
|
||||
then
|
||||
entityCreationService.bySemanticNode($section, "mortality_statement", EntityType.ENTITY).ifPresent(entity -> {
|
||||
entity.setRedactionReason("Mortality Statement found");
|
||||
entity.setLegalBasis("n-a");
|
||||
entity.setRedaction(true);
|
||||
entity.addMatchedRule("42");
|
||||
});
|
||||
var entity = entityCreationService.byBoundary(Boundary.merge($headline.getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "mortality_statement", EntityType.ENTITY, $headline.getParent());
|
||||
entity.setRedactionReason("Mortality Statement found");
|
||||
entity.setLegalBasis("n-a");
|
||||
entity.setRedaction(true);
|
||||
entity.addMatchedRule("42");
|
||||
end
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user