diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/services/EntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/services/EntityCreationService.java index ef0df4c2..1031e74e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/services/EntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/services/EntityCreationService.java @@ -60,6 +60,42 @@ public class EntityCreationService { } + public Stream betweenStringsInclusive(String start, String stop, String type, EntityType entityType, SemanticNode node) { + + List startBoundaries = RedactionSearchUtility.findBoundariesByString(start, node.getTextBlock()); + List stopBoundaries = RedactionSearchUtility.findBoundariesByString(stop, node.getTextBlock()); + + startBoundaries.forEach(boundary -> { + boundary.setStart(boundary.start() - start.length()); + boundary.setEnd(boundary.end() - start.length()); + }); + stopBoundaries.forEach(boundary -> { + boundary.setStart(boundary.start() + stop.length()); + boundary.setEnd(boundary.end() + stop.length()); + }); + + return betweenBoundaries(startBoundaries, stopBoundaries, type, entityType, node); + } + + + public Stream betweenStringsInclusiveIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) { + + List startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock()); + List stopBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(stop, node.getTextBlock()); + + startBoundaries.forEach(boundary -> { + boundary.setStart(boundary.start() - start.length()); + boundary.setEnd(boundary.end() - start.length()); + }); + stopBoundaries.forEach(boundary -> { + boundary.setStart(boundary.start() + stop.length()); + boundary.setEnd(boundary.end() + stop.length()); + }); + + return betweenBoundaries(startBoundaries, stopBoundaries, type, entityType, node); + } + + public Stream betweenStringsIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) { List startBoundaries = RedactionSearchUtility.findBoundariesByStringIgnoreCase(start, node.getTextBlock()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java index c6549313..a610c0b8 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java @@ -45,7 +45,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest { @Disabled public void titleExtraction() throws IOException { - AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/A13617AV/425_F.1.1.1 - A13617AV - Acute Oral Toxicity Study.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/ProblemDocs/15 - EVIDIS - Toxicidade oral aguda.pdf"); System.out.println("Start Full integration test"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index 7270830c..7eae3ece 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -578,7 +578,7 @@ rule "DOC.13.0: Clinical Signs" ) then entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_signs", EntityType.ENTITY) - .forEach(entity -> entity.applyWithLineBreaks("DOC.13.0", "Clinical Signs found", "n-a")); + .forEach(entity -> entity.apply("DOC.13.0", "Clinical Signs found", "n-a")); end @@ -590,10 +590,10 @@ rule "DOC.14.0: Dosages" && !getHeadline().containsString("TABLE") ) then - entityCreationService.betweenStrings("The animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> { + entityCreationService.betweenStringsInclusive("The animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> { entity.apply("DOC.14.0", "Dosage found", "n-a"); }); - entityCreationService.betweenStrings("Animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> { + entityCreationService.betweenStringsInclusive("Animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> { entity.apply("DOC.14.0", "Dosage found", "n-a"); }); entityCreationService.byRegexWithLineBreaks("(?:\\.[\\s|\\n]|^.{5,20}\\n)([^\\.]{1,200}(?:animal|given|received)[^\\.]{1,200}dose\\s(?:levels?\\s)?(?:of|at)[^\\.]{1,200})(?:\\.[\\s|\\n|$])", "dosages", EntityType.ENTITY,1, $section).forEach(entity -> { @@ -608,7 +608,7 @@ rule "DOC.15.0: Mortality" FileAttribute(label == "OECD Number", value == "425") then entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality", EntityType.ENTITY) - .forEach(entity -> entity.applyWithLineBreaks("DOC.15.0", "Mortality found", "n-a")); + .forEach(entity -> entity.apply("DOC.15.0", "Mortality found", "n-a")); end @@ -620,7 +620,7 @@ rule "DOC.17.0: Study Conclusion" ) then entityCreationService.bySemanticNodeParagraphsOnly($section, "study_conclusion", EntityType.ENTITY) - .forEach(entity -> entity.applyWithLineBreaks("DOC.17.0", "Study Conclusion found", "n-a")); + .forEach(entity -> entity.apply("DOC.17.0", "Study Conclusion found", "n-a")); end @@ -638,7 +638,7 @@ rule "DOC.18.0: Weight Behavior Changes" ) then entityCreationService.bySemanticNodeParagraphsOnly($section, "weight_behavior_changes", EntityType.ENTITY) - .forEach(entity -> entity.applyWithLineBreaks("DOC.18.0", "Weight behavior changes found", "n-a")); + .forEach(entity -> entity.apply("DOC.18.0", "Weight behavior changes found", "n-a")); end @@ -656,7 +656,7 @@ rule "DOC.19.0: Necropsy findings" ) then entityCreationService.bySemanticNodeParagraphsOnly($section, "necropsy_findings", EntityType.ENTITY) - .forEach( entity -> entity.applyWithLineBreaks("DOC.19.0", "Necropsy section found", "n-a")); + .forEach( entity -> entity.apply("DOC.19.0", "Necropsy section found", "n-a")); end @@ -675,7 +675,7 @@ rule "DOC.22.0: Clinical observations" ) then entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_observations", EntityType.ENTITY) - .forEach(entity -> entity.applyWithLineBreaks("DOC.22.0", "Clinical observations section found", "n-a")); + .forEach(entity -> entity.apply("DOC.22.0", "Clinical observations section found", "n-a")); end @@ -731,7 +731,7 @@ rule "DOC.23.0: Bodyweight changes" ) then entityCreationService.bySemanticNodeParagraphsOnly($section, "bodyweight_changes", EntityType.ENTITY) - .forEach(entity -> entity.applyWithLineBreaks("DOC.23.0", "Bodyweight section found", "n-a")); + .forEach(entity -> entity.apply("DOC.23.0", "Bodyweight section found", "n-a")); end @@ -743,7 +743,7 @@ rule "DOC.24.0: Study Design" ) then entityCreationService.bySemanticNodeParagraphsOnly($section, "study_design", EntityType.ENTITY) - .forEach(entity -> entity.applyWithLineBreaks("DOC.24.0", "Study design section found", "n-a")); + .forEach(entity -> entity.apply("DOC.24.0", "Study design section found", "n-a")); end @@ -764,7 +764,7 @@ rule "DOC.25.0: Results and Conclusion (406, 428, 438, 439, 474 & 487)" ) then entityCreationService.bySemanticNodeParagraphsOnly($section, "results_and_conclusion", EntityType.ENTITY) - .forEach(entity -> entity.applyWithLineBreaks("DOC.25.0", "Results and Conclusion found", "n-a")); + .forEach(entity -> entity.apply("DOC.25.0", "Results and Conclusion found", "n-a")); end @@ -780,7 +780,7 @@ rule "DOC.26.0: Detailing (404 & 405)" ) then entityCreationService.bySemanticNodeParagraphsOnly($section, "detailing", EntityType.ENTITY) - .forEach(entity -> entity.applyWithLineBreaks("DOC.26.0", "Detailing found", "n-a")); + .forEach(entity -> entity.apply("DOC.26.0", "Detailing found", "n-a")); end @@ -793,7 +793,7 @@ rule "DOC.32.0: Preliminary Test Results (429)" ) then entityCreationService.bySemanticNodeParagraphsOnly($section, "preliminary_test_results", EntityType.ENTITY) - .forEach(entity -> entity.applyWithLineBreaks("DOC.32.0", "Preliminary Test Results found", "n-a")); + .forEach(entity -> entity.apply("DOC.32.0", "Preliminary Test Results found", "n-a")); end @@ -803,7 +803,7 @@ rule "DOC.33.0: Test Results (429)" $section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment"))) then entityCreationService.bySemanticNodeParagraphsOnly($section, "test_results", EntityType.ENTITY) - .forEach(entity -> entity.applyWithLineBreaks("DOC.33.0", "Test Results found", "n-a")); + .forEach(entity -> entity.apply("DOC.33.0", "Test Results found", "n-a")); end @@ -938,7 +938,7 @@ rule "DOC.39.0: Dilution of the test substance" ) then entityCreationService.bySemanticNodeParagraphsOnly($section, "dilution", EntityType.ENTITY) - .forEach(entity -> entity.applyWithLineBreaks("DOC.39.0", "Dilution found.", "n-a")); + .forEach(entity -> entity.apply("DOC.39.0", "Dilution found.", "n-a")); end @@ -951,7 +951,7 @@ rule "DOC.40.0: Positive Control" ) then entityCreationService.bySemanticNodeParagraphsOnly($section, "positive_control", EntityType.ENTITY) - .forEach(entity -> entity.applyWithLineBreaks("DOC.40.0", "Positive control found.", "n-a")); + .forEach(entity -> entity.apply("DOC.40.0", "Positive control found.", "n-a")); end @@ -961,7 +961,7 @@ rule "DOC.42.0: Mortality Statement" $headline: Headline(containsString("Mortality") && !containsString("TABLE")) then entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "mortality_statement", EntityType.ENTITY) - .forEach(entity -> entity.applyWithLineBreaks("DOC.42.0", "Mortality Statement found", "n-a")); + .forEach(entity -> entity.apply("DOC.42.0", "Mortality Statement found", "n-a")); end @@ -1016,7 +1016,7 @@ rule "DOC.44.0: Results (Main Study)" ) then entityCreationService.bySemanticNodeParagraphsOnly($section, "results_(main_study)", EntityType.ENTITY) - .forEach(entity -> entity.applyWithLineBreaks("DOC.44.0", "Results for main study found.", "n-a")); + .forEach(entity -> entity.apply("DOC.44.0", "Results for main study found.", "n-a")); end @@ -1028,7 +1028,7 @@ rule "DOC.45.0: Doses (mg/kg bodyweight)" ) then entityCreationService.bySemanticNodeParagraphsOnly($section, "doses_(mg_kg_bw)", EntityType.ENTITY) - .forEach(entity -> entity.applyWithLineBreaks("DOC.45.0", "Doses per bodyweight information found", "n-a")); + .forEach(entity -> entity.apply("DOC.45.0", "Doses per bodyweight information found", "n-a")); end // This is just an example for new rules feature. @@ -1041,7 +1041,7 @@ rule "DOC.45.0: Doses (mg/kg bodyweight)" // System.out.println($headline); // entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "irgendwas", EntityType.ENTITY) // .forEach(entity -> { -// entity.applyWithLineBreaks("DOC.6.0", "positive control substance found", "n-a"); +// entity.apply("DOC.6.0", "positive control substance found", "n-a"); // }); // end @@ -1114,6 +1114,32 @@ rule "MAN.3.0: Apply image recategorization" //------------------------------------ Entity merging rules ------------------------------------ +// Rule unit: X.0 +rule "X.0.0: remove Entity contained by Entity of same type" + salience 65 + when + $larger: RedactionEntity($type: type, $entityType: entityType, isActive()) + $contained: RedactionEntity(containedBy($larger), type == $type, entityType == $entityType, this != $larger, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + then + $contained.remove("X.0.0", "remove Entity contained by Entity of same type"); + retract($contained); + end + + +// Rule unit: X.1 +rule "X.1.0: merge intersecting Entities of same type" + salience 64 + when + $first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + $second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive()) + then + RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document); + $first.remove("X.1.0", "merge intersecting Entities of same type"); + $second.remove("X.1.0", "merge intersecting Entities of same type"); + retract($first); + retract($second); + mergedEntity.getIntersectingNodes().forEach(node -> update(node)); + end //------------------------------------ File attributes rules ------------------------------------ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/ProblemDocs/15 - EVIDIS - Toxicidade oral aguda.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/ProblemDocs/15 - EVIDIS - Toxicidade oral aguda.pdf new file mode 100644 index 00000000..3d8bb7b1 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/ProblemDocs/15 - EVIDIS - Toxicidade oral aguda.pdf differ