RED-7679: WIP: Added Table Demo rules according to given examples in pdf

2023-10-25 09:12:10 +02:00 · 2023-10-25 09:12:10 +02:00 · 1b6d8d31c4
commit 1b6d8d31c4
parent 0c7d39ff0c
3 changed files with 65 additions and 29 deletions
--- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java
@ -424,31 +424,67 @@ public interface SemanticNode {
        return Arrays.stream(strings).allMatch(this::containsStringIgnoreCase);
    }

+
+    /**
+     * Checks whether this SemanticNode contains exactly the provided String as a word.
+     * @param word - String which the TextBlock might contain
+     * @return true, if this node's TextBlock contains string
+     */
    default boolean containsWord(String word) {

        return getTextBlock().getWords().stream().anyMatch(s -> s.equals(word));
    }

+
+    /**
+     * Checks whether this SemanticNode contains exactly the provided String as a word ignoring case.
+     * @param word - String which the TextBlock might contain
+     * @return true, if this node's TextBlock contains string
+     */
    default boolean containsWordIgnoreCase(String word) {

        return getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(s -> s.equals(word.toLowerCase(Locale.ENGLISH)));
    }

+
+    /**
+     * Checks whether this SemanticNode contains any of the provided Strings as a word.
+     * @param words - A List of Strings which the TextBlock might contain
+     * @return true, if this node's TextBlock contains any of the provided strings
+     */
    default boolean containsAnyWord(String... words) {

        return Arrays.stream(words).anyMatch(word -> getTextBlock().getWords().stream().anyMatch(word::equals));
    }

+
+    /**
+     * Checks whether this SemanticNode contains any of the provided Strings as a word ignoring case.
+     * @param words - A List of Strings which the TextBlock might contain
+     * @return true, if this node's TextBlock contains any of the provided strings
+     */
    default boolean containsAnyWordIgnoreCase(String... words) {

        return Arrays.stream(words).map(String::toLowerCase).anyMatch(word -> getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(word::equals));
    }

+
+    /**
+     * Checks whether this SemanticNode contains all the provided Strings as word.
+     * @param words - A List of Strings which the TextBlock might contain
+     * @return true, if this node's TextBlock contains all the provided strings
+     */
    default boolean containsAllWords(String... words) {

        return Arrays.stream(words).allMatch(word -> getTextBlock().getWords().stream().anyMatch(word::equals));
    }

+
+    /**
+     * Checks whether this SemanticNode contains all the provided Strings as word ignoring case.
+     * @param words - A List of Strings which the TextBlock might contain
+     * @return true, if this node's TextBlock contains all the provided strings
+     */
    default boolean containsAllWordsIgnoreCase(String... words) {

        return getTextBlock().getWords().stream().map(String::toLowerCase).allMatch(word -> getTextBlock().getWords().stream().anyMatch(s -> word.toLowerCase(Locale.ENGLISH).equals(s)));
--- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisTest.java
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisTest.java
@ -21,6 +21,7 @@ import org.springframework.context.annotation.Import;
 import org.springframework.context.annotation.Primary;
 import org.springframework.test.context.junit.jupiter.SpringExtension;

+import com.iqser.red.commons.jackson.ObjectMapperFactory;
 import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
 import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
 import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
@ -31,6 +32,7 @@ import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
 import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
 import com.iqser.red.storage.commons.StorageAutoConfiguration;
 import com.iqser.red.storage.commons.service.StorageService;
+import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
 import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
 import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration;
 import com.knecon.fforesight.tenantcommons.TenantContext;
@ -79,7 +81,7 @@ public class AnalysisTest extends AbstractRedactionIntegrationTest {
        @Primary
        public StorageService inmemoryStorage() {

-            return new FileSystemBackedStorageService();
+            return new FileSystemBackedStorageService(ObjectMapperFactory.create());
        }

    }
--- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl
@ -54,8 +54,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
 import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
 import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
 import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus
-import java.util.function.Function
-import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation;

 global Document document
 global EntityCreationService entityCreationService
@ -90,15 +88,15 @@ rule "TAB.0.1: Guidelines"
        $section: Section(containsAnyString("DATA REQUIREMENT", "TEST GUIDELINE", "MÉTODO(S) DE REFERÊNCIA(S):") && containsAnyString("OECD", "EPA", "OPPTS"))
    then
        entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY, 1, $section)
-            .forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline no. found", "n-a"));
+            .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline no. found"));
        entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", "oecd_guideline_year", EntityType.ENTITY, 2, $section)
-            .forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline year found", "n-a"));
+            .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found"));
        entityCreationService.byRegex("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", "oecd_guideline_year", EntityType.ENTITY, 1, $section)
-            .forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline year found", "n-a"));
+            .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found"));
        entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_number", EntityType.ENTITY, 1, $section)
-            .forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline number found", "n-a"));
+            .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline number found"));
        entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY, 2, $section)
-            .forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline year found", "n-a"));
+            .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found"));
    end

 rule "TAB.1.0: Full Table extraction (Guideline Deviation)"
@ -134,36 +132,35 @@ rule "TAB.3.0: Individual column extraction (Strain)"
            .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dosages", EntityType.ENTITY))
            .filter(Optional::isPresent)
            .map(Optional::get)
-            .forEach(redactionEntity -> redactionEntity.redact("TAB.3.0", "Individual column based on column header", "n-a"));
+            .forEach(redactionEntity -> redactionEntity.apply("TAB.3.0", "Individual column based on column header"));
    end

-rule "TAB.4.0: Dose Mortality"
+rule "TAB.4.0: Combined Columns Extraction - Sex and Dosage"
    when
        FileAttribute(label == "OECD Number", valueEqualsAnyOf("425"))
        $section: Section(getHeadline().containsString("Combined Columns"))
        $table: Table(hasHeader("Sex"), hasHeader("Dosage (mg/kg bw)")) from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList()
-        TableCell($row: row, containsAnyWordIgnoreCase("Male")) from $table.streamTableCellsWithHeader("Sex").toList()
-        $tableCell: TableCell($row == row,  containsStringIgnoreCase("Dosage")) from $table.streamTableCells().toList()
-//        $male_dosage: TableCell($row == row) from $table.streamTableCellsWithHeader("Dosage").toList())
+        $maleCells: TableCell($row: row, containsAnyWordIgnoreCase("Male")) from $table.streamTableCellsWithHeader("Sex").toList()
+        $dosageCells: TableCell($row == row) from $table.streamTableCellsWithHeader("Dosage").toList()
    then
-        System.out.println("BBBB: " + $tableCell);
-//        $table.streamTableCellsWithHeader("Mortality")
-//            .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality", EntityType.ENTITY))
-//            .filter(Optional::isPresent)
-//            .map(Optional::get)
-//            .forEach(redactionEntity -> redactionEntity.redact("TAB.0.5", "Dose Mortality found", "n-a"));
-//        $table.streamTableCellsWithHeader("Dosage (mg/kg bw)")
-//            .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality_dose", EntityType.ENTITY))
-//            .filter(Optional::isPresent)
-//            .map(Optional::get)
-//            .forEach(redactionEntity -> redactionEntity.redact("TAB.0.5", "Dose Mortality dose found", "n-a"));
+        entityCreationService.bySemanticNode($maleCells, "combined_male_dosage", EntityType.ENTITY)
+                    .ifPresent(entity -> entity.apply("TAB.4.0", "Dosage combined in row with male"));
+        entityCreationService.bySemanticNode($dosageCells, "combined_male_dosage", EntityType.ENTITY)
+                    .ifPresent(entity -> entity.apply("TAB.4.0", "Dosage combined in row with male"));
    end

-rule "TAB.4.1: sdsdf"
+rule "TAB.4.1: Combined Columns Extraction - Sex and Mortality"
    when
-
+        FileAttribute(label == "OECD Number", valueEqualsAnyOf("425"))
+        $section: Section(getHeadline().containsString("Combined Columns"))
+        $table: Table(hasHeader("Sex"), hasHeader("Mortality")) from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList()
+        $femaleCells: TableCell($row: row, containsAnyWordIgnoreCase("Female")) from $table.streamTableCellsWithHeader("Sex").toList()
+        $mortalityCells: TableCell($row == row) from $table.streamTableCellsWithHeader("Mortality").toList()
    then
-
+        entityCreationService.bySemanticNode($femaleCells, "combined_female_mortality", EntityType.ENTITY)
+            .ifPresent(entity -> entity.apply("TAB.4.1", "Mortality combined in row with female"));
+        entityCreationService.bySemanticNode($mortalityCells, "combined_female_mortality", EntityType.ENTITY)
+            .ifPresent(entity -> entity.apply("TAB.4.1", "Mortality combined in row with female"));
    end

 rule "TAB.5.0: Targeted cell extraction"
@ -187,7 +184,8 @@ rule "TAB.6.0: Targeted cell extraction (Experimental Stop date)"
        TableCell($row == row, containsStringIgnoreCase("Survived")) from $table.streamTableCellsWithHeader("Group 2").toList()
        $femaleSurvived: TableCell($row == row) from $table.streamTableCellsWithHeader("Group 2").toList()
    then
-        entityCreationService.bySemanticNode($femaleSurvived, "experiment_female_survived", EntityType.ENTITY).ifPresent(entity -> entity.redact("TAB.6.0", "Female in group to experimental start date", "n-a"));
+        entityCreationService.bySemanticNode($femaleSurvived, "experiment_female_survived", EntityType.ENTITY)
+            .ifPresent(entity -> entity.apply("TAB.6.0", "Female in group to experimental start date"));
    end

 rule "TAB.7.0: Indicator (Species)"
@ -200,7 +198,7 @@ rule "TAB.7.0: Indicator (Species)"
        $cell: TableCell($col == col, $row == row) from $table.streamTableCells().toList()
    then
        entityCreationService.bySemanticNode($cell, "study_design", EntityType.ENTITY)
-            .ifPresent(redactionEntity -> redactionEntity.redact("TAB.7.0", "Vertebrate study found", "n-a"));
+            .ifPresent(redactionEntity -> redactionEntity.apply("TAB.7.0", "Vertebrate study found"));
    end

 //------------------------------------ Manual redaction rules ------------------------------------