Merge branch 'DM-305' into 'master'

DM-305: Implemented anyHeadlineContainsString for DocuMine and fixed some DocuMine Rules

Closes DM-305

See merge request redactmanager/redaction-service!15
This commit is contained in:
Dominique Eifländer 2023-06-26 14:07:55 +02:00
commit 78a42ed86a
3 changed files with 43 additions and 34 deletions

View File

@ -8,6 +8,7 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Do
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
import java.util.stream.Stream;
import lombok.AccessLevel; import lombok.AccessLevel;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
@ -73,4 +74,16 @@ public class Section implements GenericSemanticNode {
.orElseGet(() -> getParent().getHeadline()); .orElseGet(() -> getParent().getHeadline());
} }
public boolean anyHeadlineContainsString(String value){
return streamChildrenOfType(NodeType.HEADLINE)//
.map(node -> (Headline) node).anyMatch(h -> h.containsString(value));
}
public boolean anyHeadlineContainsStringIgnoreCase(String value){
return streamChildrenOfType(NodeType.HEADLINE)//
.map(node -> (Headline) node).anyMatch(h -> h.containsStringIgnoreCase(value));
}
} }

View File

@ -43,7 +43,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
@Test @Test
public void titleExtraction() throws IOException { public void titleExtraction() throws IOException {
AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/A13617AV/474_G.1.2 - 1768300_MMNA_A13617AV_report.pdf"); AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/A13617AV/402_F.3.1 - A13617AV - Acute Dermal Toxicity.pdf");
System.out.println("Start Full integration test"); System.out.println("Start Full integration test");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
System.out.println("Finished structure analysis"); System.out.println("Finished structure analysis");

View File

@ -248,7 +248,7 @@ rule "3: Experimental Completion Date"
// ignore species and strain in irrelevant study types // ignore species and strain in irrelevant study types
rule "4a: Species" rule "4a: Species"
when when
FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","471","474","487"))
$section: Section(hasEntitiesOfType("species") || hasEntitiesOfType("strain")) $section: Section(hasEntitiesOfType("species") || hasEntitiesOfType("strain"))
then then
$section.getEntitiesOfType(List.of("species", "strain")).forEach(entity -> { $section.getEntitiesOfType(List.of("species", "strain")).forEach(entity -> {
@ -304,9 +304,9 @@ rule "5: Strain"
hasEntitiesOfType("species") hasEntitiesOfType("species")
&& hasEntitiesOfType("strain") && hasEntitiesOfType("strain")
&& ( && (
getHeadline().containsString("test system") getHeadline().containsStringIgnoreCase("test system")
|| getHeadline().containsString("animals") || getHeadline().containsStringIgnoreCase("animals")
|| getHeadline().containsString("specification") || getHeadline().containsStringIgnoreCase("specification")
) )
) )
then then
@ -515,7 +515,8 @@ rule "10a: Batch number"
when when
$section: Section( $section: Section(
( (
getHeadline().containsString("Test and Control Substances") anyHeadlineContainsString("Test Substance")
|| getHeadline().containsString("Test and Control Substances")
|| getHeadline().containsString("Test Substances") || getHeadline().containsString("Test Substances")
|| getHeadline().containsString("Test Substance") || getHeadline().containsString("Test Substance")
|| getHeadline().containsString("Test Item") || getHeadline().containsString("Test Item")
@ -723,7 +724,6 @@ rule "15: Mortality"
FileAttribute(label == "OECD Number", value == "425") FileAttribute(label == "OECD Number", value == "425")
then then
//FIXME
var entity = entityCreationService.byBoundary(Boundary.merge($headline.getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "mortality", EntityType.ENTITY, $headline.getParent()); var entity = entityCreationService.byBoundary(Boundary.merge($headline.getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "mortality", EntityType.ENTITY, $headline.getParent());
entity.setRedactionReason("Mortality found"); entity.setRedactionReason("Mortality found");
entity.setLegalBasis("n-a"); entity.setLegalBasis("n-a");
@ -740,18 +740,17 @@ rule "17: Study Conclusion"
getHeadline().containsString("Conclusion") getHeadline().containsString("Conclusion")
) )
then then
entityCreationService.bySemanticNode($section, "study_conclusion", EntityType.ENTITY).ifPresent(entity -> { var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "study_conclusion", EntityType.ENTITY, $section);
entity.setRedactionReason("Study Conclusion found"); entity.setRedactionReason("Study Conclusion found");
entity.setLegalBasis("n-a"); entity.setLegalBasis("n-a");
entity.setRedaction(true); entity.setRedaction(true);
entity.addMatchedRule("17"); entity.addMatchedRule("17");
});
end end
rule "18: Weight Behavior Changes" rule "18: Weight Behavior Changes"
when when
FileAttribute(label == "OECD Number", value == "425") FileAttribute(label == "OECD Number", value == "402")
$section: Section( $section: Section(
getHeadline().containsString("Results") getHeadline().containsString("Results")
&& ( && (
@ -762,14 +761,11 @@ rule "18: Weight Behavior Changes"
) )
) )
then then
var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "weight_behavior_changes", EntityType.ENTITY, $section);
//FIXME
entityCreationService.bySemanticNode($section, "weight_behavior_changes", EntityType.ENTITY).ifPresent(entity -> {
entity.setRedactionReason("Weight behavior changes found"); entity.setRedactionReason("Weight behavior changes found");
entity.setLegalBasis("n-a"); entity.setLegalBasis("n-a");
entity.setRedaction(true); entity.setRedaction(true);
entity.addMatchedRule("18"); entity.addMatchedRule("18");
});
end end
rule "19: Necropsy findings" rule "19: Necropsy findings"
@ -1181,17 +1177,17 @@ rule "40: Positive Control"
end end
rule "42: Mortality Statement" rule "42: Mortality Statement"
when when
FileAttribute(label == "OECD Number", value == "402") FileAttribute(label == "OECD Number", value == "402")
$section: Section( getHeadline().containsString("Mortality") && !getHeadline().containsString("TABLE")) $headline: Headline(containsString("Mortality") && !containsString("TABLE"))
then then
entityCreationService.bySemanticNode($section, "mortality_statement", EntityType.ENTITY).ifPresent(entity -> { var entity = entityCreationService.byBoundary(Boundary.merge($headline.getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "mortality_statement", EntityType.ENTITY, $headline.getParent());
entity.setRedactionReason("Mortality Statement found"); entity.setRedactionReason("Mortality Statement found");
entity.setLegalBasis("n-a"); entity.setLegalBasis("n-a");
entity.setRedaction(true); entity.setRedaction(true);
entity.addMatchedRule("42"); entity.addMatchedRule("42");
});
end end