diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Section.java index e2a0ddae..17d5b16c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/nodes/Section.java @@ -8,6 +8,7 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Do import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector; +import java.util.stream.Stream; import lombok.AccessLevel; import lombok.AllArgsConstructor; @@ -73,4 +74,16 @@ public class Section implements GenericSemanticNode { .orElseGet(() -> getParent().getHeadline()); } + + public boolean anyHeadlineContainsString(String value){ + return streamChildrenOfType(NodeType.HEADLINE)// + .map(node -> (Headline) node).anyMatch(h -> h.containsString(value)); + } + + + public boolean anyHeadlineContainsStringIgnoreCase(String value){ + return streamChildrenOfType(NodeType.HEADLINE)// + .map(node -> (Headline) node).anyMatch(h -> h.containsStringIgnoreCase(value)); + } + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java index 66a80836..1e32b39b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java @@ -43,7 +43,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest { @Test public void titleExtraction() throws IOException { - AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/A13617AV/474_G.1.2 - 1768300_MMNA_A13617AV_report.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/A13617AV/402_F.3.1 - A13617AV - Acute Dermal Toxicity.pdf"); System.out.println("Start Full integration test"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); System.out.println("Finished structure analysis"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index ef0d3980..5ca8ff74 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -248,7 +248,7 @@ rule "3: Experimental Completion Date" // ignore species and strain in irrelevant study types rule "4a: Species" when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) + FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","471","474","487")) $section: Section(hasEntitiesOfType("species") || hasEntitiesOfType("strain")) then $section.getEntitiesOfType(List.of("species", "strain")).forEach(entity -> { @@ -304,9 +304,9 @@ rule "5: Strain" hasEntitiesOfType("species") && hasEntitiesOfType("strain") && ( - getHeadline().containsString("test system") - || getHeadline().containsString("animals") - || getHeadline().containsString("specification") + getHeadline().containsStringIgnoreCase("test system") + || getHeadline().containsStringIgnoreCase("animals") + || getHeadline().containsStringIgnoreCase("specification") ) ) then @@ -515,7 +515,8 @@ rule "10a: Batch number" when $section: Section( ( - getHeadline().containsString("Test and Control Substances") + anyHeadlineContainsString("Test Substance") + || getHeadline().containsString("Test and Control Substances") || getHeadline().containsString("Test Substances") || getHeadline().containsString("Test Substance") || getHeadline().containsString("Test Item") @@ -723,7 +724,6 @@ rule "15: Mortality" FileAttribute(label == "OECD Number", value == "425") then - //FIXME var entity = entityCreationService.byBoundary(Boundary.merge($headline.getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "mortality", EntityType.ENTITY, $headline.getParent()); entity.setRedactionReason("Mortality found"); entity.setLegalBasis("n-a"); @@ -735,23 +735,22 @@ rule "15: Mortality" rule "17: Study Conclusion" when - FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) - $section: Section( - getHeadline().containsString("Conclusion") - ) + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) + $section: Section( + getHeadline().containsString("Conclusion") + ) then - entityCreationService.bySemanticNode($section, "study_conclusion", EntityType.ENTITY).ifPresent(entity -> { - entity.setRedactionReason("Study Conclusion found"); - entity.setLegalBasis("n-a"); - entity.setRedaction(true); - entity.addMatchedRule("17"); - }); + var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "study_conclusion", EntityType.ENTITY, $section); + entity.setRedactionReason("Study Conclusion found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("17"); end rule "18: Weight Behavior Changes" when - FileAttribute(label == "OECD Number", value == "425") + FileAttribute(label == "OECD Number", value == "402") $section: Section( getHeadline().containsString("Results") && ( @@ -762,14 +761,11 @@ rule "18: Weight Behavior Changes" ) ) then - - //FIXME - entityCreationService.bySemanticNode($section, "weight_behavior_changes", EntityType.ENTITY).ifPresent(entity -> { - entity.setRedactionReason("Weight behavior changes found"); - entity.setLegalBasis("n-a"); - entity.setRedaction(true); - entity.addMatchedRule("18"); - }); + var entity = entityCreationService.byBoundary(Boundary.merge($section.streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "weight_behavior_changes", EntityType.ENTITY, $section); + entity.setRedactionReason("Weight behavior changes found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("18"); end rule "19: Necropsy findings" @@ -1181,17 +1177,17 @@ rule "40: Positive Control" end + rule "42: Mortality Statement" when - FileAttribute(label == "OECD Number", value == "402") - $section: Section( getHeadline().containsString("Mortality") && !getHeadline().containsString("TABLE")) + FileAttribute(label == "OECD Number", value == "402") + $headline: Headline(containsString("Mortality") && !containsString("TABLE")) then - entityCreationService.bySemanticNode($section, "mortality_statement", EntityType.ENTITY).ifPresent(entity -> { - entity.setRedactionReason("Mortality Statement found"); - entity.setLegalBasis("n-a"); - entity.setRedaction(true); - entity.addMatchedRule("42"); - }); + var entity = entityCreationService.byBoundary(Boundary.merge($headline.getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "mortality_statement", EntityType.ENTITY, $headline.getParent()); + entity.setRedactionReason("Mortality Statement found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("42"); end