RED-7679: WIP: Added Table Demo rules according to given examples in pdf

This commit is contained in:
Ali Oezyetimoglu 2023-10-25 09:12:10 +02:00
parent 0c7d39ff0c
commit 1b6d8d31c4
3 changed files with 65 additions and 29 deletions

View File

@ -424,31 +424,67 @@ public interface SemanticNode {
return Arrays.stream(strings).allMatch(this::containsStringIgnoreCase);
}
/**
* Checks whether this SemanticNode contains exactly the provided String as a word.
* @param word - String which the TextBlock might contain
* @return true, if this node's TextBlock contains string
*/
default boolean containsWord(String word) {
return getTextBlock().getWords().stream().anyMatch(s -> s.equals(word));
}
/**
* Checks whether this SemanticNode contains exactly the provided String as a word ignoring case.
* @param word - String which the TextBlock might contain
* @return true, if this node's TextBlock contains string
*/
default boolean containsWordIgnoreCase(String word) {
return getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(s -> s.equals(word.toLowerCase(Locale.ENGLISH)));
}
/**
* Checks whether this SemanticNode contains any of the provided Strings as a word.
* @param words - A List of Strings which the TextBlock might contain
* @return true, if this node's TextBlock contains any of the provided strings
*/
default boolean containsAnyWord(String... words) {
return Arrays.stream(words).anyMatch(word -> getTextBlock().getWords().stream().anyMatch(word::equals));
}
/**
* Checks whether this SemanticNode contains any of the provided Strings as a word ignoring case.
* @param words - A List of Strings which the TextBlock might contain
* @return true, if this node's TextBlock contains any of the provided strings
*/
default boolean containsAnyWordIgnoreCase(String... words) {
return Arrays.stream(words).map(String::toLowerCase).anyMatch(word -> getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(word::equals));
}
/**
* Checks whether this SemanticNode contains all the provided Strings as word.
* @param words - A List of Strings which the TextBlock might contain
* @return true, if this node's TextBlock contains all the provided strings
*/
default boolean containsAllWords(String... words) {
return Arrays.stream(words).allMatch(word -> getTextBlock().getWords().stream().anyMatch(word::equals));
}
/**
* Checks whether this SemanticNode contains all the provided Strings as word ignoring case.
* @param words - A List of Strings which the TextBlock might contain
* @return true, if this node's TextBlock contains all the provided strings
*/
default boolean containsAllWordsIgnoreCase(String... words) {
return getTextBlock().getWords().stream().map(String::toLowerCase).allMatch(word -> getTextBlock().getWords().stream().anyMatch(s -> word.toLowerCase(Locale.ENGLISH).equals(s)));

View File

@ -21,6 +21,7 @@ import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.iqser.red.commons.jackson.ObjectMapperFactory;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
@ -31,6 +32,7 @@ import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration;
import com.knecon.fforesight.tenantcommons.TenantContext;
@ -79,7 +81,7 @@ public class AnalysisTest extends AbstractRedactionIntegrationTest {
@Primary
public StorageService inmemoryStorage() {
return new FileSystemBackedStorageService();
return new FileSystemBackedStorageService(ObjectMapperFactory.create());
}
}

View File

@ -54,8 +54,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus
import java.util.function.Function
import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation;
global Document document
global EntityCreationService entityCreationService
@ -90,15 +88,15 @@ rule "TAB.0.1: Guidelines"
$section: Section(containsAnyString("DATA REQUIREMENT", "TEST GUIDELINE", "MÉTODO(S) DE REFERÊNCIA(S):") && containsAnyString("OECD", "EPA", "OPPTS"))
then
entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY, 1, $section)
.forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline no. found", "n-a"));
.forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline no. found"));
entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", "oecd_guideline_year", EntityType.ENTITY, 2, $section)
.forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline year found", "n-a"));
.forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found"));
entityCreationService.byRegex("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", "oecd_guideline_year", EntityType.ENTITY, 1, $section)
.forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline year found", "n-a"));
.forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found"));
entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_number", EntityType.ENTITY, 1, $section)
.forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline number found", "n-a"));
.forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline number found"));
entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY, 2, $section)
.forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline year found", "n-a"));
.forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found"));
end
rule "TAB.1.0: Full Table extraction (Guideline Deviation)"
@ -134,36 +132,35 @@ rule "TAB.3.0: Individual column extraction (Strain)"
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dosages", EntityType.ENTITY))
.filter(Optional::isPresent)
.map(Optional::get)
.forEach(redactionEntity -> redactionEntity.redact("TAB.3.0", "Individual column based on column header", "n-a"));
.forEach(redactionEntity -> redactionEntity.apply("TAB.3.0", "Individual column based on column header"));
end
rule "TAB.4.0: Dose Mortality"
rule "TAB.4.0: Combined Columns Extraction - Sex and Dosage"
when
FileAttribute(label == "OECD Number", valueEqualsAnyOf("425"))
$section: Section(getHeadline().containsString("Combined Columns"))
$table: Table(hasHeader("Sex"), hasHeader("Dosage (mg/kg bw)")) from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList()
TableCell($row: row, containsAnyWordIgnoreCase("Male")) from $table.streamTableCellsWithHeader("Sex").toList()
$tableCell: TableCell($row == row, containsStringIgnoreCase("Dosage")) from $table.streamTableCells().toList()
// $male_dosage: TableCell($row == row) from $table.streamTableCellsWithHeader("Dosage").toList())
$maleCells: TableCell($row: row, containsAnyWordIgnoreCase("Male")) from $table.streamTableCellsWithHeader("Sex").toList()
$dosageCells: TableCell($row == row) from $table.streamTableCellsWithHeader("Dosage").toList()
then
System.out.println("BBBB: " + $tableCell);
// $table.streamTableCellsWithHeader("Mortality")
// .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality", EntityType.ENTITY))
// .filter(Optional::isPresent)
// .map(Optional::get)
// .forEach(redactionEntity -> redactionEntity.redact("TAB.0.5", "Dose Mortality found", "n-a"));
// $table.streamTableCellsWithHeader("Dosage (mg/kg bw)")
// .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality_dose", EntityType.ENTITY))
// .filter(Optional::isPresent)
// .map(Optional::get)
// .forEach(redactionEntity -> redactionEntity.redact("TAB.0.5", "Dose Mortality dose found", "n-a"));
entityCreationService.bySemanticNode($maleCells, "combined_male_dosage", EntityType.ENTITY)
.ifPresent(entity -> entity.apply("TAB.4.0", "Dosage combined in row with male"));
entityCreationService.bySemanticNode($dosageCells, "combined_male_dosage", EntityType.ENTITY)
.ifPresent(entity -> entity.apply("TAB.4.0", "Dosage combined in row with male"));
end
rule "TAB.4.1: sdsdf"
rule "TAB.4.1: Combined Columns Extraction - Sex and Mortality"
when
FileAttribute(label == "OECD Number", valueEqualsAnyOf("425"))
$section: Section(getHeadline().containsString("Combined Columns"))
$table: Table(hasHeader("Sex"), hasHeader("Mortality")) from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList()
$femaleCells: TableCell($row: row, containsAnyWordIgnoreCase("Female")) from $table.streamTableCellsWithHeader("Sex").toList()
$mortalityCells: TableCell($row == row) from $table.streamTableCellsWithHeader("Mortality").toList()
then
entityCreationService.bySemanticNode($femaleCells, "combined_female_mortality", EntityType.ENTITY)
.ifPresent(entity -> entity.apply("TAB.4.1", "Mortality combined in row with female"));
entityCreationService.bySemanticNode($mortalityCells, "combined_female_mortality", EntityType.ENTITY)
.ifPresent(entity -> entity.apply("TAB.4.1", "Mortality combined in row with female"));
end
rule "TAB.5.0: Targeted cell extraction"
@ -187,7 +184,8 @@ rule "TAB.6.0: Targeted cell extraction (Experimental Stop date)"
TableCell($row == row, containsStringIgnoreCase("Survived")) from $table.streamTableCellsWithHeader("Group 2").toList()
$femaleSurvived: TableCell($row == row) from $table.streamTableCellsWithHeader("Group 2").toList()
then
entityCreationService.bySemanticNode($femaleSurvived, "experiment_female_survived", EntityType.ENTITY).ifPresent(entity -> entity.redact("TAB.6.0", "Female in group to experimental start date", "n-a"));
entityCreationService.bySemanticNode($femaleSurvived, "experiment_female_survived", EntityType.ENTITY)
.ifPresent(entity -> entity.apply("TAB.6.0", "Female in group to experimental start date"));
end
rule "TAB.7.0: Indicator (Species)"
@ -200,7 +198,7 @@ rule "TAB.7.0: Indicator (Species)"
$cell: TableCell($col == col, $row == row) from $table.streamTableCells().toList()
then
entityCreationService.bySemanticNode($cell, "study_design", EntityType.ENTITY)
.ifPresent(redactionEntity -> redactionEntity.redact("TAB.7.0", "Vertebrate study found", "n-a"));
.ifPresent(redactionEntity -> redactionEntity.apply("TAB.7.0", "Vertebrate study found"));
end
//------------------------------------ Manual redaction rules ------------------------------------