RED-9859: Redactions found by et. al. rule not skipped with published information

This commit is contained in:
Maverick Studer 2024-09-10 14:24:09 +02:00
parent 3c165070ee
commit 07b9b8bf8a
9 changed files with 358 additions and 323 deletions

View File

@ -338,7 +338,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID); var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var desireeEtAl = findEntityByTypeAndValue(entityLog, "CBI_author", "Desiree").filter(e -> e.getEntryType().equals(EntryType.ENTITY)) var desireeEtAl = findEntityByTypeAndValue(entityLog, "CBI_author", "Desiree").filter(e -> e.getEntryType().equals(EntryType.ENTITY))
.filter(e -> e.getMatchedRule().startsWith("CBI.16")) .filter(e -> e.getMatchedRule().startsWith("CBI.7"))
.findAny() .findAny()
.orElseThrow(); .orElseThrow();
IdRemoval removal = buildIdRemoval(desireeEtAl.getId()); IdRemoval removal = buildIdRemoval(desireeEtAl.getId());

View File

@ -131,56 +131,54 @@ rule "CBI.2.0: Do not redact genitive CBI Author"
// Rule unit: CBI.7 // Rule unit: CBI.7
rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables" rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
$section: Section(!hasTables(), $section: Section(containsString("et al."))
hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(redactionEntity -> { .forEach(entity -> {
redactionEntity.skipWithReferences( entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
"CBI.7.0", dictionary.recommendEverywhere(entity);
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
}); });
end end
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row" rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() $section: Section(containsString("et al."))
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then then
$authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
end .forEach(entity -> {
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
rule "CBI.7.2: Do not redact PII if published information found in Section without tables" dictionary.recommendEverywhere(entity);
when
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
hasEntitiesOfType("PII"))
then
$section.getEntitiesOfType("PII")
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.7.2",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
}); });
end end
rule "CBI.7.3: Do not redact PII if published information found in same table row" rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII")) FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() $section: Section(containsString("et al."))
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
then then
$pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii)); entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
dictionary.recommendEverywhere(entity);
});
end end
@ -291,54 +289,56 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study
// Rule unit: CBI.16 // Rule unit: CBI.16
rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx" rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
$section: Section(containsString("et al.")) $section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) $section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(entity -> { .forEach(redactionEntity -> {
entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); redactionEntity.skipWithReferences(
dictionary.recommendEverywhere(entity); "CBI.16.0",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
}); });
end end
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
$section: Section(containsString("et al.")) $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) $authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
.forEach(entity -> { end
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity); rule "CBI.16.2: Do not redact PII if published information found in Section without tables"
when
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
hasEntitiesOfType("PII"))
then
$section.getEntitiesOfType("PII")
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.16.2",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
}); });
end end
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" rule "CBI.16.3: Do not redact PII if published information found in same table row"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
$section: Section(containsString("et al.")) $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
then then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) $pii.skipWithReferences("CBI.16.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
.forEach(entity -> {
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
dictionary.recommendEverywhere(entity);
});
end end

View File

@ -314,56 +314,54 @@ rule "CBI.6.1: Do not redact Names and Addresses if vertebrate but also publishe
// Rule unit: CBI.7 // Rule unit: CBI.7
rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables" rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
$section: Section(!hasTables(), $section: Section(containsString("et al."))
hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(redactionEntity -> { .forEach(entity -> {
redactionEntity.skipWithReferences( entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
"CBI.7.0", dictionary.recommendEverywhere(entity);
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
}); });
end end
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row" rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() $section: Section(containsString("et al."))
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then then
$authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
end .forEach(entity -> {
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
rule "CBI.7.2: Do not redact PII if published information found in Section without tables" dictionary.recommendEverywhere(entity);
when
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
hasEntitiesOfType("PII"))
then
$section.getEntitiesOfType("PII")
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.7.2",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
}); });
end end
rule "CBI.7.3: Do not redact PII if published information found in same table row" rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII")) FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() $section: Section(containsString("et al."))
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
then then
$pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii)); entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
dictionary.recommendEverywhere(entity);
});
end end
@ -656,54 +654,56 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio
// Rule unit: CBI.16 // Rule unit: CBI.16
rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx" rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
$section: Section(containsString("et al.")) $section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) $section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(entity -> { .forEach(redactionEntity -> {
entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); redactionEntity.skipWithReferences(
dictionary.recommendEverywhere(entity); "CBI.16.0",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
}); });
end end
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
$section: Section(containsString("et al.")) $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) $authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
.forEach(entity -> { end
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity); rule "CBI.16.2: Do not redact PII if published information found in Section without tables"
when
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
hasEntitiesOfType("PII"))
then
$section.getEntitiesOfType("PII")
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.16.2",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
}); });
end end
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" rule "CBI.16.3: Do not redact PII if published information found in same table row"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
$section: Section(containsString("et al.")) $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
then then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) $pii.skipWithReferences("CBI.16.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
.forEach(entity -> {
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
dictionary.recommendEverywhere(entity);
});
end end

View File

@ -131,30 +131,30 @@ rule "CBI.2.0: Do not redact genitive CBI Author"
// Rule unit: CBI.7 // Rule unit: CBI.7
rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables" rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
$section: Section(!hasTables(), not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
hasEntitiesOfType("published_information"), $section: Section(containsString("et al."))
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(redactionEntity -> { .forEach(entity -> {
redactionEntity.skipWithReferences( entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
"CBI.7.0", dictionary.recommendEverywhere(entity);
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
}); });
end end
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row" rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() $section: Section(containsString("et al."))
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then then
$authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end end
@ -226,30 +226,30 @@ rule "CBI.11.0: Recommend all CBI_author entities in Table with Vertebrate Study
// Rule unit: CBI.16 // Rule unit: CBI.16
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $section: Section(!hasTables(),
$section: Section(containsString("et al.")) hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) $section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(entity -> { .forEach(redactionEntity -> {
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002"); redactionEntity.skipWithReferences(
dictionary.recommendEverywhere(entity); "CBI.16.0",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
}); });
end end
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
$section: Section(containsString("et al.")) $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) $authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
.forEach(entity -> {
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end end

View File

@ -216,6 +216,58 @@ rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also red
end end
// Rule unit: CBI.7
rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
dictionary.recommendEverywhere(entity);
});
end
// Rule unit: CBI.8 // Rule unit: CBI.8
rule "CBI.8.0: Redacted because Section contains must_redact entity" rule "CBI.8.0: Redacted because Section contains must_redact entity"
when when
@ -426,58 +478,6 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio
end end
// Rule unit: CBI.16
rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
dictionary.recommendEverywhere(entity);
});
end
// Rule unit: CBI.17 // Rule unit: CBI.17
rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon" rule "CBI.17.0: Add recommendation for Addresses in Test Organism sections, without colon"
when when
@ -1223,7 +1223,6 @@ rule "AI.5.0: Combine and add NER Entities as CBI_address"
end end
// Rule unit: AI.6 // Rule unit: AI.6
rule "AI.6.0: Add all NER Entities of type Location" rule "AI.6.0: Add all NER Entities of type Location"
salience 999 salience 999
@ -1238,7 +1237,6 @@ rule "AI.6.0: Add all NER Entities of type Location"
end end
// Rule unit: AI.7 // Rule unit: AI.7
rule "AI.7.0: Add all NER Entities of type Address" rule "AI.7.0: Add all NER Entities of type Address"
salience 999 salience 999
@ -1252,6 +1250,7 @@ rule "AI.7.0: Add all NER Entities of type Address"
.ifPresent(e -> e.skip("AI.7.0", ""))); .ifPresent(e -> e.skip("AI.7.0", "")));
end end
//------------------------------------ Manual changes rules ------------------------------------ //------------------------------------ Manual changes rules ------------------------------------
// Rule unit: MAN.0 // Rule unit: MAN.0

View File

@ -4,11 +4,13 @@ import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.List; import java.util.List;
import java.util.Map;
import com.knecon.fforesight.utility.rules.management.factory.RuleFileFactory; import com.knecon.fforesight.utility.rules.management.factory.RuleFileFactory;
import com.knecon.fforesight.utility.rules.management.factory.RuleFileParser; import com.knecon.fforesight.utility.rules.management.factory.RuleFileParser;
import com.knecon.fforesight.utility.rules.management.models.BasicRule; import com.knecon.fforesight.utility.rules.management.models.BasicRule;
import com.knecon.fforesight.utility.rules.management.models.RuleFileBluePrint; import com.knecon.fforesight.utility.rules.management.models.RuleFileBluePrint;
import com.knecon.fforesight.utility.rules.management.models.RuleIdentifier;
import com.knecon.fforesight.utility.rules.management.utils.RuleFileIO; import com.knecon.fforesight.utility.rules.management.utils.RuleFileIO;
import lombok.SneakyThrows; import lombok.SneakyThrows;
@ -21,17 +23,15 @@ import lombok.experimental.UtilityClass;
@UtilityClass @UtilityClass
public class RuleFileMigrator { public class RuleFileMigrator {
@SneakyThrows @SneakyThrows
public void migrateFile(File ruleFile) { public void migrateFile(File ruleFile) {
RuleFileBluePrint ruleFileBluePrint = RuleFileParser.buildBluePrintFromRulesString(RuleFileIO.getRulesString(ruleFile.getAbsolutePath())); RuleFileBluePrint ruleFileBluePrint = RuleFileParser.buildBluePrintFromRulesString(RuleFileIO.getRulesString(ruleFile.getAbsolutePath()));
RuleFileBluePrint combinedBluePrint = RuleFileParser.buildBluePrintFromAllRuleFiles(); RuleFileBluePrint combinedBluePrint = RuleFileParser.buildBluePrintFromAllRuleFiles();
for (BasicRule ruleToReplace : ruleFileBluePrint.getAllRules()) { //replaceRules(ruleFileBluePrint, combinedBluePrint);
List<BasicRule> rulesToAdd = combinedBluePrint.findRuleByIdentifier(ruleToReplace.identifier()); replaceRuleIdentifiers(combinedBluePrint, ruleFileBluePrint);
ruleFileBluePrint.removeRule(ruleToReplace.identifier());
rulesToAdd.forEach(ruleFileBluePrint::addRule);
}
String migratedRulesString = RuleFileFactory.buildRuleString(ruleFileBluePrint); String migratedRulesString = RuleFileFactory.buildRuleString(ruleFileBluePrint);
String migratedFilePath = ruleFile.getAbsolutePath(); String migratedFilePath = ruleFile.getAbsolutePath();
@ -40,4 +40,35 @@ public class RuleFileMigrator {
} }
} }
private static void replaceRules(RuleFileBluePrint ruleFileBluePrint, RuleFileBluePrint combinedBluePrint) {
for (BasicRule ruleToReplace : ruleFileBluePrint.getAllRules()) {
List<BasicRule> rulesToAdd = combinedBluePrint.findRuleByIdentifier(ruleToReplace.identifier());
ruleFileBluePrint.removeRule(ruleToReplace.identifier());
rulesToAdd.forEach(ruleFileBluePrint::addRule);
}
}
private static void replaceRuleIdentifiers(RuleFileBluePrint combinedBluePrint, RuleFileBluePrint ruleFileBluePrint) {
Map<String, String> identifierReplaceMap = Map.of("CBI.7.0", "CBI.16.0", "CBI.7.1", "CBI.16.1", "CBI.7.2", "CBI.16.2", "CBI.7.3", "CBI.16.3");
for (String identifier : identifierReplaceMap.keySet()) {
RuleIdentifier ruleId = RuleIdentifier.fromString(identifier);
RuleIdentifier otherRuleId = RuleIdentifier.fromString(identifierReplaceMap.get(identifier));
List<BasicRule> rulesToAdd = combinedBluePrint.findRuleByIdentifier(otherRuleId);
List<BasicRule> otherRulesToAdd = combinedBluePrint.findRuleByIdentifier(ruleId);
boolean removeRules = ruleFileBluePrint.removeRule(ruleId);
boolean removeOtherRules = ruleFileBluePrint.removeRule(otherRuleId);
if (removeRules) {
rulesToAdd.forEach(ruleFileBluePrint::addRule);
}
if (removeOtherRules) {
otherRulesToAdd.forEach(ruleFileBluePrint::addRule);
}
}
}
} }

View File

@ -7,15 +7,19 @@ import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import java.util.Set; import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors; import java.util.stream.Collectors;
public record RuleFileBluePrint(String imports, String globals, String queries, List<RuleClass> ruleClasses) { public record RuleFileBluePrint(String imports, String globals, String queries, List<RuleClass> ruleClasses) {
public void removeRule(RuleIdentifier ruleIdentifier) { public boolean removeRule(RuleIdentifier ruleIdentifier) {
AtomicBoolean wasRemoved = new AtomicBoolean(false);
findRuleClassByType(ruleIdentifier.type()).ifPresent(ruleClass -> ruleClass.findRuleUnitByInteger(ruleIdentifier.unit()) findRuleClassByType(ruleIdentifier.type()).ifPresent(ruleClass -> ruleClass.findRuleUnitByInteger(ruleIdentifier.unit())
.ifPresent(ruleUnit -> { .ifPresent(ruleUnit -> {
ruleUnit.rules().removeIf(rule -> rule.identifier().matches(ruleIdentifier)); boolean removed = ruleUnit.rules().removeIf(rule -> rule.identifier().matches(ruleIdentifier));
wasRemoved.set(removed);
if (ruleUnit.rules().isEmpty()) { if (ruleUnit.rules().isEmpty()) {
ruleClass.ruleUnits().remove(ruleUnit); ruleClass.ruleUnits().remove(ruleUnit);
} }
@ -23,7 +27,7 @@ public record RuleFileBluePrint(String imports, String globals, String queries,
ruleClasses().remove(ruleClass); ruleClasses().remove(ruleClass);
} }
})); }));
return wasRemoved.get();
} }

View File

@ -315,58 +315,55 @@ rule "CBI.6.1: Do not redact Names and Addresses if vertebrate but also publishe
// Rule unit: CBI.7 // Rule unit: CBI.7
rule "CBI.7.0: Do not redact Names and Addresses if published information found in Section without tables" rule "CBI.7.0: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
$section: Section(!hasTables(), $section: Section(containsString("et al."))
hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then then
$section.getEntitiesOfType(List.of("CBI_author", "CBI_address")) entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(redactionEntity -> { .forEach(entity -> {
redactionEntity.skipWithReferences( entity.redact("CBI.7.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)");
"CBI.7.0", dictionary.recommendEverywhere(entity);
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
}); });
end end
rule "CBI.7.1: Do not redact Names and Addresses if published information found in same table row" rule "CBI.7.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() $section: Section(containsString("et al."))
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then then
$authorOrAddress.skipWithReferences("CBI.7.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress)); entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
end .forEach(entity -> {
entity.redact("CBI.7.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
rule "CBI.7.2: Do not redact PII if published information found in Section without tables" dictionary.recommendEverywhere(entity);
when
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
hasEntitiesOfType("PII"))
then
$section.getEntitiesOfType("PII")
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.7.2",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
}); });
end end
rule "CBI.7.3: Do not redact PII if published information found in same table row" rule "CBI.7.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
$table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII")) FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y")
$cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList() $section: Section(containsString("et al."))
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
then then
$pii.skipWithReferences("CBI.7.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii)); entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end end
rule "CBI.7.3: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.7.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
dictionary.recommendEverywhere(entity);
});
end
// Rule unit: CBI.8 // Rule unit: CBI.8
rule "CBI.8.0: Redacted because Section contains must_redact entity" rule "CBI.8.0: Redacted because Section contains must_redact entity"
@ -656,54 +653,56 @@ rule "CBI.15.1: Redact CBI_author and CBI_address if row contains \"determinatio
// Rule unit: CBI.16 // Rule unit: CBI.16
rule "CBI.16.0: Add CBI_author with \"et al.\" RegEx" rule "CBI.16.0: Do not redact Names and Addresses if published information found in Section without tables"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
$section: Section(containsString("et al.")) $section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
then then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) $section.getEntitiesOfType(List.of("CBI_author", "CBI_address"))
.forEach(entity -> { .forEach(redactionEntity -> {
entity.redact("CBI.16.0", "Author found by \"et al\" regex", "Reg (EC) No 1107/2009 Art. 63 (2g)"); redactionEntity.skipWithReferences(
dictionary.recommendEverywhere(entity); "CBI.16.0",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
}); });
end end
rule "CBI.16.1: Add CBI_author with \"et al.\" RegEx (non vertebrate study)" rule "CBI.16.1: Do not redact Names and Addresses if published information found in same table row"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
not FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
$section: Section(containsString("et al.")) $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) $authorOrAddress.skipWithReferences("CBI.16.1", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $authorOrAddress));
.forEach(entity -> { end
entity.redact("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity); rule "CBI.16.2: Do not redact PII if published information found in Section without tables"
when
$section: Section(!hasTables(),
hasEntitiesOfType("published_information"),
hasEntitiesOfType("PII"))
then
$section.getEntitiesOfType("PII")
.forEach(redactionEntity -> {
redactionEntity.skipWithReferences(
"CBI.16.2",
"Published Information found in section",
$section.getEntitiesOfType("published_information")
);
}); });
end end
rule "CBI.16.2: Add CBI_author with \"et al.\" RegEx (vertebrate study)" rule "CBI.16.3: Do not redact PII if published information found in same table row"
agenda-group "LOCAL_DICTIONARY_ADDS"
when when
FileAttribute(label == "Vertebrate Study", value soundslike "Yes" || value.toLowerCase() == "y") $table: Table(hasEntitiesOfType("published_information"), hasEntitiesOfType("PII"))
$section: Section(containsString("et al.")) $cellsWithPublishedInformation: TableCell() from $table.streamTableCellsWhichContainType("published_information").toList()
$tableCell: TableCell(row == $cellsWithPublishedInformation.row) from $table.streamTableCells().toList()
$pii: TextEntity(type() == "PII", active()) from $tableCell.getEntities()
then then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section) $pii.skipWithReferences("CBI.16.3", "Published Information found in row", $table.getEntitiesOfTypeInSameRow("published_information", $pii));
.forEach(entity -> {
entity.redact("CBI.16.2", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.recommendEverywhere(entity);
});
end
rule "CBI.16.3: Add CBI_author with \"et al.\" RegEx"
agenda-group "LOCAL_DICTIONARY_ADDS"
when
$section: Section(containsString("et al."))
then
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.redact("CBI.16.3", "Author found by \"et al\" regex", "Article 4(1)(b), Regulation (EC) No 1049/2001 (Personal data)");
dictionary.recommendEverywhere(entity);
});
end end

View File

@ -25,10 +25,12 @@ public class RuleFileMigrationTest {
// Put your redaction service drools paths and dossier-templates paths both RM and DM here // Put your redaction service drools paths and dossier-templates paths both RM and DM here
static final List<String> ruleFileDirs = List.of( static final List<String> ruleFileDirs = List.of(
"/home/kschuettler/iqser/redaction/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools", //"/Users/maverickstuder/Documents/RedactManager/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools",
"/home/kschuettler/iqser/redaction/dossier-templates-v2", // "/Users/maverickstuder/Documents/RedactManager/dossier-templates-v2"
"/home/kschuettler/iqser/fforesight/dossier-templates-v2", "/Users/maverickstuder/Documents/PM"
"/home/kschuettler/iqser/business-logic");
);
@Test @Test