From 1b6d8d31c4b117ae73af5db71222f87ed7cf55f4 Mon Sep 17 00:00:00 2001 From: Ali Oezyetimoglu Date: Wed, 25 Oct 2023 09:12:10 +0200 Subject: [PATCH] RED-7679: WIP: Added Table Demo rules according to given examples in pdf --- .../model/document/nodes/SemanticNode.java | 36 +++++++++++++ .../redaction/v1/server/AnalysisTest.java | 4 +- .../src/test/resources/drools/table_demo.drl | 54 +++++++++---------- 3 files changed, 65 insertions(+), 29 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java index 0b065ecf..ce93415f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java @@ -424,31 +424,67 @@ public interface SemanticNode { return Arrays.stream(strings).allMatch(this::containsStringIgnoreCase); } + + /** + * Checks whether this SemanticNode contains exactly the provided String as a word. + * @param word - String which the TextBlock might contain + * @return true, if this node's TextBlock contains string + */ default boolean containsWord(String word) { return getTextBlock().getWords().stream().anyMatch(s -> s.equals(word)); } + + /** + * Checks whether this SemanticNode contains exactly the provided String as a word ignoring case. + * @param word - String which the TextBlock might contain + * @return true, if this node's TextBlock contains string + */ default boolean containsWordIgnoreCase(String word) { return getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(s -> s.equals(word.toLowerCase(Locale.ENGLISH))); } + + /** + * Checks whether this SemanticNode contains any of the provided Strings as a word. + * @param words - A List of Strings which the TextBlock might contain + * @return true, if this node's TextBlock contains any of the provided strings + */ default boolean containsAnyWord(String... words) { return Arrays.stream(words).anyMatch(word -> getTextBlock().getWords().stream().anyMatch(word::equals)); } + + /** + * Checks whether this SemanticNode contains any of the provided Strings as a word ignoring case. + * @param words - A List of Strings which the TextBlock might contain + * @return true, if this node's TextBlock contains any of the provided strings + */ default boolean containsAnyWordIgnoreCase(String... words) { return Arrays.stream(words).map(String::toLowerCase).anyMatch(word -> getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(word::equals)); } + + /** + * Checks whether this SemanticNode contains all the provided Strings as word. + * @param words - A List of Strings which the TextBlock might contain + * @return true, if this node's TextBlock contains all the provided strings + */ default boolean containsAllWords(String... words) { return Arrays.stream(words).allMatch(word -> getTextBlock().getWords().stream().anyMatch(word::equals)); } + + /** + * Checks whether this SemanticNode contains all the provided Strings as word ignoring case. + * @param words - A List of Strings which the TextBlock might contain + * @return true, if this node's TextBlock contains all the provided strings + */ default boolean containsAllWordsIgnoreCase(String... words) { return getTextBlock().getWords().stream().map(String::toLowerCase).allMatch(word -> getTextBlock().getWords().stream().anyMatch(s -> word.toLowerCase(Locale.ENGLISH).equals(s))); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisTest.java index 9965d360..24d0a52b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisTest.java @@ -21,6 +21,7 @@ import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Primary; import org.springframework.test.context.junit.jupiter.SpringExtension; +import com.iqser.red.commons.jackson.ObjectMapperFactory; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult; import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType; @@ -31,6 +32,7 @@ import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse; import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils; import com.iqser.red.storage.commons.StorageAutoConfiguration; import com.iqser.red.storage.commons.service.StorageService; +import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService; import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration; import com.knecon.fforesight.tenantcommons.TenantContext; @@ -79,7 +81,7 @@ public class AnalysisTest extends AbstractRedactionIntegrationTest { @Primary public StorageService inmemoryStorage() { - return new FileSystemBackedStorageService(); + return new FileSystemBackedStorageService(ObjectMapperFactory.create()); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl index 31657538..69015695 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/table_demo.drl @@ -54,8 +54,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus -import java.util.function.Function -import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation; global Document document global EntityCreationService entityCreationService @@ -90,15 +88,15 @@ rule "TAB.0.1: Guidelines" $section: Section(containsAnyString("DATA REQUIREMENT", "TEST GUIDELINE", "MÉTODO(S) DE REFERÊNCIA(S):") && containsAnyString("OECD", "EPA", "OPPTS")) then entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY, 1, $section) - .forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline no. found", "n-a")); + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline no. found")); entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", "oecd_guideline_year", EntityType.ENTITY, 2, $section) - .forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline year found", "n-a")); + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found")); entityCreationService.byRegex("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", "oecd_guideline_year", EntityType.ENTITY, 1, $section) - .forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline year found", "n-a")); + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found")); entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_number", EntityType.ENTITY, 1, $section) - .forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline number found", "n-a")); + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline number found")); entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY, 2, $section) - .forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline year found", "n-a")); + .forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found")); end rule "TAB.1.0: Full Table extraction (Guideline Deviation)" @@ -134,36 +132,35 @@ rule "TAB.3.0: Individual column extraction (Strain)" .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dosages", EntityType.ENTITY)) .filter(Optional::isPresent) .map(Optional::get) - .forEach(redactionEntity -> redactionEntity.redact("TAB.3.0", "Individual column based on column header", "n-a")); + .forEach(redactionEntity -> redactionEntity.apply("TAB.3.0", "Individual column based on column header")); end -rule "TAB.4.0: Dose Mortality" +rule "TAB.4.0: Combined Columns Extraction - Sex and Dosage" when FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) $section: Section(getHeadline().containsString("Combined Columns")) $table: Table(hasHeader("Sex"), hasHeader("Dosage (mg/kg bw)")) from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList() - TableCell($row: row, containsAnyWordIgnoreCase("Male")) from $table.streamTableCellsWithHeader("Sex").toList() - $tableCell: TableCell($row == row, containsStringIgnoreCase("Dosage")) from $table.streamTableCells().toList() -// $male_dosage: TableCell($row == row) from $table.streamTableCellsWithHeader("Dosage").toList()) + $maleCells: TableCell($row: row, containsAnyWordIgnoreCase("Male")) from $table.streamTableCellsWithHeader("Sex").toList() + $dosageCells: TableCell($row == row) from $table.streamTableCellsWithHeader("Dosage").toList() then - System.out.println("BBBB: " + $tableCell); -// $table.streamTableCellsWithHeader("Mortality") -// .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality", EntityType.ENTITY)) -// .filter(Optional::isPresent) -// .map(Optional::get) -// .forEach(redactionEntity -> redactionEntity.redact("TAB.0.5", "Dose Mortality found", "n-a")); -// $table.streamTableCellsWithHeader("Dosage (mg/kg bw)") -// .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality_dose", EntityType.ENTITY)) -// .filter(Optional::isPresent) -// .map(Optional::get) -// .forEach(redactionEntity -> redactionEntity.redact("TAB.0.5", "Dose Mortality dose found", "n-a")); + entityCreationService.bySemanticNode($maleCells, "combined_male_dosage", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.4.0", "Dosage combined in row with male")); + entityCreationService.bySemanticNode($dosageCells, "combined_male_dosage", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.4.0", "Dosage combined in row with male")); end -rule "TAB.4.1: sdsdf" +rule "TAB.4.1: Combined Columns Extraction - Sex and Mortality" when - + FileAttribute(label == "OECD Number", valueEqualsAnyOf("425")) + $section: Section(getHeadline().containsString("Combined Columns")) + $table: Table(hasHeader("Sex"), hasHeader("Mortality")) from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList() + $femaleCells: TableCell($row: row, containsAnyWordIgnoreCase("Female")) from $table.streamTableCellsWithHeader("Sex").toList() + $mortalityCells: TableCell($row == row) from $table.streamTableCellsWithHeader("Mortality").toList() then - + entityCreationService.bySemanticNode($femaleCells, "combined_female_mortality", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.4.1", "Mortality combined in row with female")); + entityCreationService.bySemanticNode($mortalityCells, "combined_female_mortality", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.4.1", "Mortality combined in row with female")); end rule "TAB.5.0: Targeted cell extraction" @@ -187,7 +184,8 @@ rule "TAB.6.0: Targeted cell extraction (Experimental Stop date)" TableCell($row == row, containsStringIgnoreCase("Survived")) from $table.streamTableCellsWithHeader("Group 2").toList() $femaleSurvived: TableCell($row == row) from $table.streamTableCellsWithHeader("Group 2").toList() then - entityCreationService.bySemanticNode($femaleSurvived, "experiment_female_survived", EntityType.ENTITY).ifPresent(entity -> entity.redact("TAB.6.0", "Female in group to experimental start date", "n-a")); + entityCreationService.bySemanticNode($femaleSurvived, "experiment_female_survived", EntityType.ENTITY) + .ifPresent(entity -> entity.apply("TAB.6.0", "Female in group to experimental start date")); end rule "TAB.7.0: Indicator (Species)" @@ -200,7 +198,7 @@ rule "TAB.7.0: Indicator (Species)" $cell: TableCell($col == col, $row == row) from $table.streamTableCells().toList() then entityCreationService.bySemanticNode($cell, "study_design", EntityType.ENTITY) - .ifPresent(redactionEntity -> redactionEntity.redact("TAB.7.0", "Vertebrate study found", "n-a")); + .ifPresent(redactionEntity -> redactionEntity.apply("TAB.7.0", "Vertebrate study found")); end //------------------------------------ Manual redaction rules ------------------------------------