RED-7679: WIP: Added Table Demo rules according to given examples in pdf
This commit is contained in:
parent
0c7d39ff0c
commit
1b6d8d31c4
@ -424,31 +424,67 @@ public interface SemanticNode {
|
||||
return Arrays.stream(strings).allMatch(this::containsStringIgnoreCase);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains exactly the provided String as a word.
|
||||
* @param word - String which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains string
|
||||
*/
|
||||
default boolean containsWord(String word) {
|
||||
|
||||
return getTextBlock().getWords().stream().anyMatch(s -> s.equals(word));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains exactly the provided String as a word ignoring case.
|
||||
* @param word - String which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains string
|
||||
*/
|
||||
default boolean containsWordIgnoreCase(String word) {
|
||||
|
||||
return getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(s -> s.equals(word.toLowerCase(Locale.ENGLISH)));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains any of the provided Strings as a word.
|
||||
* @param words - A List of Strings which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains any of the provided strings
|
||||
*/
|
||||
default boolean containsAnyWord(String... words) {
|
||||
|
||||
return Arrays.stream(words).anyMatch(word -> getTextBlock().getWords().stream().anyMatch(word::equals));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains any of the provided Strings as a word ignoring case.
|
||||
* @param words - A List of Strings which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains any of the provided strings
|
||||
*/
|
||||
default boolean containsAnyWordIgnoreCase(String... words) {
|
||||
|
||||
return Arrays.stream(words).map(String::toLowerCase).anyMatch(word -> getTextBlock().getWords().stream().map(String::toLowerCase).anyMatch(word::equals));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains all the provided Strings as word.
|
||||
* @param words - A List of Strings which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains all the provided strings
|
||||
*/
|
||||
default boolean containsAllWords(String... words) {
|
||||
|
||||
return Arrays.stream(words).allMatch(word -> getTextBlock().getWords().stream().anyMatch(word::equals));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains all the provided Strings as word ignoring case.
|
||||
* @param words - A List of Strings which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains all the provided strings
|
||||
*/
|
||||
default boolean containsAllWordsIgnoreCase(String... words) {
|
||||
|
||||
return getTextBlock().getWords().stream().map(String::toLowerCase).allMatch(word -> getTextBlock().getWords().stream().anyMatch(s -> word.toLowerCase(Locale.ENGLISH).equals(s)));
|
||||
|
||||
@ -21,6 +21,7 @@ import org.springframework.context.annotation.Import;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||
|
||||
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
|
||||
@ -31,6 +32,7 @@ import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingServiceProcessorConfiguration;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
@ -79,7 +81,7 @@ public class AnalysisTest extends AbstractRedactionIntegrationTest {
|
||||
@Primary
|
||||
public StorageService inmemoryStorage() {
|
||||
|
||||
return new FileSystemBackedStorageService();
|
||||
return new FileSystemBackedStorageService(ObjectMapperFactory.create());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -54,8 +54,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualLegalBasisChange;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus
|
||||
import java.util.function.Function
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation;
|
||||
|
||||
global Document document
|
||||
global EntityCreationService entityCreationService
|
||||
@ -90,15 +88,15 @@ rule "TAB.0.1: Guidelines"
|
||||
$section: Section(containsAnyString("DATA REQUIREMENT", "TEST GUIDELINE", "MÉTODO(S) DE REFERÊNCIA(S):") && containsAnyString("OECD", "EPA", "OPPTS"))
|
||||
then
|
||||
entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY, 1, $section)
|
||||
.forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline no. found", "n-a"));
|
||||
.forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline no. found"));
|
||||
entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", "oecd_guideline_year", EntityType.ENTITY, 2, $section)
|
||||
.forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline year found", "n-a"));
|
||||
.forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found"));
|
||||
entityCreationService.byRegex("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", "oecd_guideline_year", EntityType.ENTITY, 1, $section)
|
||||
.forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline year found", "n-a"));
|
||||
.forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found"));
|
||||
entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_number", EntityType.ENTITY, 1, $section)
|
||||
.forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline number found", "n-a"));
|
||||
.forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline number found"));
|
||||
entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY, 2, $section)
|
||||
.forEach(guideline -> guideline.redact("TAB.0.1", "OECD Guideline year found", "n-a"));
|
||||
.forEach(guideline -> guideline.apply("TAB.0.1", "OECD Guideline year found"));
|
||||
end
|
||||
|
||||
rule "TAB.1.0: Full Table extraction (Guideline Deviation)"
|
||||
@ -134,36 +132,35 @@ rule "TAB.3.0: Individual column extraction (Strain)"
|
||||
.map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dosages", EntityType.ENTITY))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.forEach(redactionEntity -> redactionEntity.redact("TAB.3.0", "Individual column based on column header", "n-a"));
|
||||
.forEach(redactionEntity -> redactionEntity.apply("TAB.3.0", "Individual column based on column header"));
|
||||
end
|
||||
|
||||
rule "TAB.4.0: Dose Mortality"
|
||||
rule "TAB.4.0: Combined Columns Extraction - Sex and Dosage"
|
||||
when
|
||||
FileAttribute(label == "OECD Number", valueEqualsAnyOf("425"))
|
||||
$section: Section(getHeadline().containsString("Combined Columns"))
|
||||
$table: Table(hasHeader("Sex"), hasHeader("Dosage (mg/kg bw)")) from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList()
|
||||
TableCell($row: row, containsAnyWordIgnoreCase("Male")) from $table.streamTableCellsWithHeader("Sex").toList()
|
||||
$tableCell: TableCell($row == row, containsStringIgnoreCase("Dosage")) from $table.streamTableCells().toList()
|
||||
// $male_dosage: TableCell($row == row) from $table.streamTableCellsWithHeader("Dosage").toList())
|
||||
$maleCells: TableCell($row: row, containsAnyWordIgnoreCase("Male")) from $table.streamTableCellsWithHeader("Sex").toList()
|
||||
$dosageCells: TableCell($row == row) from $table.streamTableCellsWithHeader("Dosage").toList()
|
||||
then
|
||||
System.out.println("BBBB: " + $tableCell);
|
||||
// $table.streamTableCellsWithHeader("Mortality")
|
||||
// .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality", EntityType.ENTITY))
|
||||
// .filter(Optional::isPresent)
|
||||
// .map(Optional::get)
|
||||
// .forEach(redactionEntity -> redactionEntity.redact("TAB.0.5", "Dose Mortality found", "n-a"));
|
||||
// $table.streamTableCellsWithHeader("Dosage (mg/kg bw)")
|
||||
// .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality_dose", EntityType.ENTITY))
|
||||
// .filter(Optional::isPresent)
|
||||
// .map(Optional::get)
|
||||
// .forEach(redactionEntity -> redactionEntity.redact("TAB.0.5", "Dose Mortality dose found", "n-a"));
|
||||
entityCreationService.bySemanticNode($maleCells, "combined_male_dosage", EntityType.ENTITY)
|
||||
.ifPresent(entity -> entity.apply("TAB.4.0", "Dosage combined in row with male"));
|
||||
entityCreationService.bySemanticNode($dosageCells, "combined_male_dosage", EntityType.ENTITY)
|
||||
.ifPresent(entity -> entity.apply("TAB.4.0", "Dosage combined in row with male"));
|
||||
end
|
||||
|
||||
rule "TAB.4.1: sdsdf"
|
||||
rule "TAB.4.1: Combined Columns Extraction - Sex and Mortality"
|
||||
when
|
||||
|
||||
FileAttribute(label == "OECD Number", valueEqualsAnyOf("425"))
|
||||
$section: Section(getHeadline().containsString("Combined Columns"))
|
||||
$table: Table(hasHeader("Sex"), hasHeader("Mortality")) from $section.getParent().streamAllSubNodesOfType(NodeType.TABLE).toList()
|
||||
$femaleCells: TableCell($row: row, containsAnyWordIgnoreCase("Female")) from $table.streamTableCellsWithHeader("Sex").toList()
|
||||
$mortalityCells: TableCell($row == row) from $table.streamTableCellsWithHeader("Mortality").toList()
|
||||
then
|
||||
|
||||
entityCreationService.bySemanticNode($femaleCells, "combined_female_mortality", EntityType.ENTITY)
|
||||
.ifPresent(entity -> entity.apply("TAB.4.1", "Mortality combined in row with female"));
|
||||
entityCreationService.bySemanticNode($mortalityCells, "combined_female_mortality", EntityType.ENTITY)
|
||||
.ifPresent(entity -> entity.apply("TAB.4.1", "Mortality combined in row with female"));
|
||||
end
|
||||
|
||||
rule "TAB.5.0: Targeted cell extraction"
|
||||
@ -187,7 +184,8 @@ rule "TAB.6.0: Targeted cell extraction (Experimental Stop date)"
|
||||
TableCell($row == row, containsStringIgnoreCase("Survived")) from $table.streamTableCellsWithHeader("Group 2").toList()
|
||||
$femaleSurvived: TableCell($row == row) from $table.streamTableCellsWithHeader("Group 2").toList()
|
||||
then
|
||||
entityCreationService.bySemanticNode($femaleSurvived, "experiment_female_survived", EntityType.ENTITY).ifPresent(entity -> entity.redact("TAB.6.0", "Female in group to experimental start date", "n-a"));
|
||||
entityCreationService.bySemanticNode($femaleSurvived, "experiment_female_survived", EntityType.ENTITY)
|
||||
.ifPresent(entity -> entity.apply("TAB.6.0", "Female in group to experimental start date"));
|
||||
end
|
||||
|
||||
rule "TAB.7.0: Indicator (Species)"
|
||||
@ -200,7 +198,7 @@ rule "TAB.7.0: Indicator (Species)"
|
||||
$cell: TableCell($col == col, $row == row) from $table.streamTableCells().toList()
|
||||
then
|
||||
entityCreationService.bySemanticNode($cell, "study_design", EntityType.ENTITY)
|
||||
.ifPresent(redactionEntity -> redactionEntity.redact("TAB.7.0", "Vertebrate study found", "n-a"));
|
||||
.ifPresent(redactionEntity -> redactionEntity.apply("TAB.7.0", "Vertebrate study found"));
|
||||
end
|
||||
|
||||
//------------------------------------ Manual redaction rules ------------------------------------
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user