hotfix: streamEntitiesWhereRowContainsEntityOfType is broken

This commit is contained in:
Kilian Schüttler 2024-02-23 14:30:48 +01:00
parent fd3fe87d90
commit 5f67efc8c7
9 changed files with 405 additions and 141 deletions

View File

@ -12,7 +12,7 @@ plugins {
description = "redaction-service-server-v1"
val layoutParserVersion = "0.86.0"
val layoutParserVersion = "0.91.0"
val jacksonVersion = "2.15.2"
val droolsVersion = "9.44.0.Final"
val pdfBoxVersion = "3.0.0"
@ -67,6 +67,7 @@ dependencies {
testImplementation("org.apache.pdfbox:pdfbox-tools:${pdfBoxVersion}")
testImplementation("org.springframework.boot:spring-boot-starter-test:${springBootStarterVersion}")
testImplementation("com.knecon.fforesight:viewer-doc-processor:${layoutParserVersion}")
testImplementation("com.knecon.fforesight:layoutparser-service-processor:${layoutParserVersion}") {
exclude(
group = "com.iqser.red.service",

View File

@ -9,6 +9,7 @@ import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
@ -64,8 +65,7 @@ public class Table implements SemanticNode {
*/
public Stream<TextEntity> streamEntitiesWhereRowContainsStringsIgnoreCase(List<String> strings) {
return IntStream.range(0, numberOfRows)
.boxed()
return IntStream.range(0, numberOfRows).boxed()
.filter(row -> rowContainsStringsIgnoreCase(row, strings))
.flatMap(this::streamRow)
.map(TableCell::getEntities)
@ -82,8 +82,11 @@ public class Table implements SemanticNode {
*/
public boolean rowContainsStringsIgnoreCase(Integer row, List<String> strings) {
String rowText = streamRow(row).map(TableCell::getTextBlock).collect(new TextBlockCollector()).getSearchText().toLowerCase(Locale.ROOT);
return strings.stream().map(String::toLowerCase).allMatch(rowText::contains);
String rowText = streamRow(row).map(TableCell::getTextBlock)
.collect(new TextBlockCollector()).getSearchText().toLowerCase(Locale.ROOT);
return strings.stream()
.map(String::toLowerCase)
.allMatch(rowText::contains);
}
@ -96,9 +99,13 @@ public class Table implements SemanticNode {
*/
public Stream<TextEntity> streamEntitiesWhereRowHasHeaderAndValue(String header, String value) {
List<Integer> vertebrateStudyCols = streamHeaders().filter(headerNode -> headerNode.containsString(header)).map(TableCell::getCol).toList();
List<Integer> vertebrateStudyCols = streamHeaders().filter(headerNode -> headerNode.containsString(header))
.map(TableCell::getCol)
.toList();
return streamTableCells().filter(tableCellNode -> vertebrateStudyCols.stream()
.anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value))).map(TableCell::getEntities).flatMap(Collection::stream);
.anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value)))
.map(TableCell::getEntities)
.flatMap(Collection::stream);
}
@ -111,9 +118,13 @@ public class Table implements SemanticNode {
*/
public Stream<TextEntity> streamEntitiesWhereRowHasHeaderAndAnyValue(String header, List<String> values) {
List<Integer> colsWithHeader = streamHeaders().filter(headerNode -> headerNode.containsString(header)).map(TableCell::getCol).toList();
List<Integer> colsWithHeader = streamHeaders().filter(headerNode -> headerNode.containsString(header))
.map(TableCell::getCol)
.toList();
return streamTableCells().filter(tableCellNode -> colsWithHeader.stream()
.anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values))).map(TableCell::getEntities).flatMap(Collection::stream);
.anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values)))
.map(TableCell::getEntities)
.flatMap(Collection::stream);
}
@ -126,16 +137,33 @@ public class Table implements SemanticNode {
*/
public Stream<TextEntity> streamEntitiesWhereRowContainsEntitiesOfType(List<String> types) {
List<Integer> rowsWithEntityOfType = getEntities().stream()
.filter(TextEntity::active)
.filter(redactionEntity -> types.stream().anyMatch(type -> type.equals(redactionEntity.type())))
.map(TextEntity::getIntersectingNodes)
.filter(node -> node instanceof TableCell)
.map(node -> (TableCell) node)
.map(TableCell::getRow)
.toList();
return IntStream.range(0, numberOfRows).boxed()
.filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type)
.anyMatch(types::contains))
.flatMap(this::streamRow)
.map(TableCell::getEntities)
.flatMap(Collection::stream);
}
return rowsWithEntityOfType.stream().flatMap(this::streamRow).map(TableCell::getEntities).flatMap(Collection::stream);
/**
* Streams all entities in this table, that appear in a row, which contains at least one entity of each of the provided types.
* Ignores Entity with ignored == true or removed == true.
*
* @param types type strings to check whether a row contains an entity like them
* @return Stream of all entities in this table, that appear in a row, which contains at least one entity of each of the provided types.
*/
public Stream<TextEntity> streamEntitiesWhereRowContainsEntitiesOfEachType(List<String> types) {
return IntStream.range(0, numberOfRows).boxed()
.filter(rowNumber -> {
Set<String> entityTypes = streamTextEntitiesInRow(rowNumber).map(TextEntity::type)
.collect(Collectors.toSet());
return entityTypes.containsAll(types);
})
.flatMap(this::streamRow)
.map(TableCell::getEntities)
.flatMap(Collection::stream);
}
@ -148,18 +176,43 @@ public class Table implements SemanticNode {
*/
public Stream<TextEntity> streamEntitiesWhereRowContainsNoEntitiesOfType(List<String> types) {
return IntStream.range(0, numberOfRows)
.boxed()
.filter(rowNumber -> streamRow(rowNumber).map(TableCell::getEntities)
.flatMap(Collection::stream)
.filter(TextEntity::active)
.noneMatch(entity -> types.contains(entity.type())))
return IntStream.range(0, numberOfRows).boxed()
.filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type)
.noneMatch(types::contains))
.flatMap(this::streamRow)
.map(TableCell::getEntities)
.flatMap(Collection::stream);
}
/**
* Streams all Entities in the given row.
*
* @param rowNumber the row number to look for
* @return stream of TextEntities occurring in row
*/
public Stream<TextEntity> streamTextEntitiesInRow(int rowNumber) {
return streamRow(rowNumber).map(TableCell::getEntities)
.flatMap(Collection::stream)
.filter(TextEntity::active);
}
/**
* Streams all Entities in the given col.
*
* @param colNumber the column number to look for
* @return stream of TextEntities occurring in row
*/
public Stream<TextEntity> streamTextEntitiesInCol(int colNumber) {
return streamCol(colNumber).map(TableCell::getEntities)
.flatMap(Collection::stream)
.filter(TextEntity::active);
}
/**
* Returns a TableCell at the provided row and column location.
*
@ -173,7 +226,8 @@ public class Table implements SemanticNode {
throw new IllegalArgumentException(format("row %d, col %d is out of bounds for number of rows of %d and number of cols %d", row, col, numberOfRows, numberOfCols));
}
int idx = row * numberOfCols + col;
return (TableCell) documentTree.getEntryById(treeId).getChildren().get(idx).getNode();
return (TableCell) documentTree.getEntryById(treeId).getChildren()
.get(idx).getNode();
}
@ -196,7 +250,7 @@ public class Table implements SemanticNode {
*/
public Stream<TableCell> streamTableCellsWhichContainType(String type) {
return streamTableCells().filter(tableCell -> tableCell.getEntities().stream().filter(TextEntity::active).anyMatch(entity -> entity.type().equals(type)));
return streamTableCells().filter(tableCell -> tableCell.hasEntitiesOfType(type));
}
@ -222,7 +276,8 @@ public class Table implements SemanticNode {
*/
public Stream<TableCell> streamCol(int col) {
return IntStream.range(0, numberOfRows).boxed().map(row -> getCell(row, col));
return IntStream.range(0, numberOfRows).boxed()
.map(row -> getCell(row, col));
}
@ -234,7 +289,8 @@ public class Table implements SemanticNode {
*/
public Stream<TableCell> streamRow(int row) {
return IntStream.range(0, numberOfCols).boxed().map(col -> getCell(row, col));
return IntStream.range(0, numberOfCols).boxed()
.map(col -> getCell(row, col));
}
@ -258,7 +314,8 @@ public class Table implements SemanticNode {
*/
public Stream<TableCell> streamHeadersForCell(int row, int col) {
return Stream.concat(streamRow(row), streamCol(col)).filter(TableCell::isHeader);
return Stream.concat(streamRow(row), streamCol(col))
.filter(TableCell::isHeader);
}
@ -348,7 +405,9 @@ public class Table implements SemanticNode {
public TextBlock getTextBlock() {
if (textBlock == null) {
textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());
textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf)
.map(SemanticNode::getLeafTextBlock)
.collect(new TextBlockCollector());
}
return textBlock;
}

View File

@ -0,0 +1,162 @@
package com.iqser.red.service.redaction.v1.server.document.graph;
import static com.iqser.red.service.redaction.v1.server.utils.EntityVisualizationUtility.ENTITY_LAYER;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.awt.Color;
import java.io.File;
import java.io.FileOutputStream;
import java.nio.file.Path;
import java.util.List;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.drools.io.ClassPathResource;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.service.redaction.v1.server.utils.EntityVisualizationUtility;
import com.knecon.fforesight.service.viewerdoc.model.Visualizations;
import com.knecon.fforesight.service.viewerdoc.service.ViewerDocumentService;
import com.knecon.fforesight.tenantcommons.TenantContext;
import lombok.SneakyThrows;
public class TableTest extends BuildDocumentIntegrationTest {
private static final boolean DRAW_FILE = false;
@Autowired
private EntityEnrichmentService entityEnrichmentService;
private EntityCreationService entityCreationService;
private static final String TYPE_1 = "type1";
private static final String TYPE_2 = "type2";
private static final String TYPE_3 = "type3";
private static final String TYPE_4 = "type4";
private Table table;
private Set<TextEntity> entities;
@SneakyThrows
@BeforeEach
public void createTable() {
entityCreationService = new EntityCreationService(entityEnrichmentService);
String fileName = "files/Minimal Examples/BasicTable.pdf";
Document document = buildGraph(fileName);
table = (Table) document.streamAllSubNodesOfType(NodeType.TABLE)
.findAny()
.orElseThrow();
entities = List.of(//
entityCreationService.byString("Cell11", TYPE_1, EntityType.ENTITY, document),
entityCreationService.byString("Cell21", TYPE_1, EntityType.ENTITY, document),
entityCreationService.byString("Cell31", TYPE_1, EntityType.ENTITY, document),
entityCreationService.byString("Cell41", TYPE_1, EntityType.ENTITY, document),
entityCreationService.byString("Cell51", TYPE_1, EntityType.ENTITY, document),
entityCreationService.byString("Cell12", TYPE_2, EntityType.ENTITY, document),
entityCreationService.byString("Cell32", TYPE_2, EntityType.ENTITY, document),
entityCreationService.byString("Cell42", TYPE_2, EntityType.ENTITY, document),
entityCreationService.byString("Cell23", TYPE_3, EntityType.ENTITY, document),
entityCreationService.byString("Cell53", TYPE_3, EntityType.ENTITY, document),
entityCreationService.byString("Cell14", TYPE_4, EntityType.ENTITY, document),
entityCreationService.byString("Cell34", TYPE_4, EntityType.ENTITY, document))
.stream()
.flatMap(Function.identity())
.collect(Collectors.toSet());
if (DRAW_FILE) {
File file = new File("/tmp/" + Path.of(fileName).getFileName().toString());
storageService.downloadTo(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.VIEWER_DOCUMENT),
file);
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null);
var visualizationsOnPage = EntityVisualizationUtility.createVisualizationsOnPage(document.getEntities(), Color.MAGENTA);
viewerDocumentService.addVisualizationsOnPage(file,
file,
Visualizations.builder()
.layer(ENTITY_LAYER)
.visualizationsOnPages(visualizationsOnPage)
.layerVisibilityDefaultValue(true)
.build());
}
}
@Test
public void testStreamEntitiesWhereRowContainsEntitiesOfType() {
int type_2_count = table.getEntitiesOfType(TYPE_2).size();
assertEquals(type_2_count,
table.streamEntitiesWhereRowContainsEntitiesOfType(List.of(TYPE_1))
.filter(textEntity -> textEntity.type().equals(TYPE_2))
.count());
assertEquals(type_2_count,
table.streamEntitiesWhereRowContainsEntitiesOfType(List.of(TYPE_1, TYPE_4))
.filter(textEntity -> textEntity.type().equals(TYPE_2))
.count());
assertEquals(2,
table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of(TYPE_1, TYPE_4))
.filter(textEntity -> textEntity.type().equals(TYPE_2))
.count());
assertEquals(0,
table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of(TYPE_1, TYPE_3))
.filter(textEntity -> textEntity.type().equals(TYPE_2))
.count());
assertEquals(0,
table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of(TYPE_1, TYPE_3, TYPE_4))
.filter(textEntity -> textEntity.type().equals(TYPE_2))
.count());
assertEquals(type_2_count,
table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of())
.filter(textEntity -> textEntity.type().equals(TYPE_2))
.count());
assertEquals(3,
table.streamTextEntitiesInRow(1)
.count());
assertEquals(2,
table.streamTextEntitiesInRow(4)
.count());
assertEquals(5,
table.streamTextEntitiesInCol(1)
.count());
assertEquals(3,
table.streamTextEntitiesInRow(3)
.count());
}
}

View File

@ -0,0 +1,61 @@
package com.iqser.red.service.redaction.v1.server.utils;
import java.awt.Color;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.pdfbox.cos.COSName;
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.knecon.fforesight.service.viewerdoc.ContentStreams;
import com.knecon.fforesight.service.viewerdoc.model.ColoredRectangle;
import com.knecon.fforesight.service.viewerdoc.model.VisualizationsOnPage;
import lombok.experimental.UtilityClass;
@UtilityClass
public class EntityVisualizationUtility {
public static final ContentStreams.Identifier ENTITY_LAYER = new ContentStreams.Identifier("Entities", COSName.getPDFName("KNECON_ENTITIES"), true);
public Map<Integer, VisualizationsOnPage> createVisualizationsOnPage(Collection<TextEntity> entity, Color color) {
Map<Integer, VisualizationsOnPage> visualizations = new HashMap<>();
Set<Page> pages = entity.stream()
.map(TextEntity::getPages)
.flatMap(Collection::stream)
.collect(Collectors.toSet());
pages.forEach(page -> visualizations.put(page.getNumber() - 1, buildVisualizationsOnPage(color, page)));
return visualizations;
}
private static VisualizationsOnPage buildVisualizationsOnPage(Color color, Page page) {
return VisualizationsOnPage.builder().coloredRectangles(getEntityRectangles(color, page)).build();
}
private static List<ColoredRectangle> getEntityRectangles(Color color, Page page) {
return page.getEntities()
.stream()
.map(TextEntity::getPositionsOnPagePerPage)
.flatMap(Collection::stream)
.filter(p -> p.getPage().equals(page))
.map(PositionOnPage::getRectanglePerLine)
.flatMap(Collection::stream)
.map(r -> new ColoredRectangle(r, color, 1))
.toList();
}
}

View File

@ -34,6 +34,7 @@ public class LayoutParsingRequestProvider {
.positionBlockFileStorageId(positionBlockFileStorageId)
.pageFileStorageId(pageFileStorageId)
.simplifiedTextStorageId(simplifiedTextStorageId).viewerDocumentStorageId(viewerDocumentStorageId)
.visualLayoutParsingFileId(Optional.empty())
.build();
}

View File

@ -157,18 +157,17 @@ rule "CBI.3.0: Redacted because Section contains a vertebrate"
rule "CBI.3.1: Redacted because table row contains a vertebrate"
when
$table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
$table: Table(hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
$cellsWithvertebrate: TableCell() from $table.streamTableCellsWhichContainType("vertebrate").toList()
$tableCell: TableCell(row == $cellsWithvertebrate.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.applyWithReferences(
$authorOrAddress.applyWithReferences(
"CBI.3.1",
"Vertebrate found",
"Reg (EC) No 1107/2009 Art. 63 (2g)",
$table.getEntitiesOfTypeInSameRow("vertebrate", entity)
$table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress)
);
});
end
rule "CBI.3.2: Do not redact because Section does not contain a vertebrate"
@ -207,23 +206,21 @@ rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is fo
});
end
rule "CBI.4.1: Do not redact Names and Addresses if no_redaction_indicator is found in table row"
rule "CBI.4.1: Redacted because table row contains a vertebrate"
when
$table: Table(hasEntitiesOfType("no_redaction_indicator"),
hasEntitiesOfType("vertebrate"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
$table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
TableCell($row: row) from $table.streamTableCellsWhichContainType("vertebrate").toList()
TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList()
$tableCell: TableCell(row == $row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.skipWithReferences(
$authorOrAddress.skipWithReferences(
"CBI.4.1",
"Vertebrate but a no redaction indicator found",
Stream.concat(
$table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(),
$table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList()
$table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress).stream(),
$table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $authorOrAddress).stream()).toList()
);
});
end
@ -250,22 +247,20 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red
rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row"
when
$table: Table(hasEntitiesOfType("no_redaction_indicator"),
hasEntitiesOfType("redaction_indicator"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
$table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
TableCell($row: row) from $table.streamTableCellsWhichContainType("redaction_indicator").toList()
TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList()
$tableCell: TableCell(row == $row) from $table.streamTableCells().toList()
$entity: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.applyWithReferences(
$entity.applyWithReferences(
"CBI.5.1",
"no_redaction_indicator but also redaction_indicator found",
"Reg (EC) No 1107/2009 Art. 63 (2g)",
Stream.concat(
$table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(),
$table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList()
$table.getEntitiesOfTypeInSameRow("redaction_indicator", $entity).stream(),
$table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $entity).stream()).toList()
);
});
end
@ -355,18 +350,17 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity"
rule "CBI.8.1: Redacted because table row contains must_redact entity"
when
$table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
$table: Table(hasEntitiesOfType("must_redact"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
$cellsWithMustRedact: TableCell() from $table.streamTableCellsWhichContainType("must_redact").toList()
$tableCell: TableCell(row == $cellsWithMustRedact.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.applyWithReferences(
$authorOrAddress.applyWithReferences(
"CBI.8.1",
"must_redact entity found",
"Must_redact found",
"Reg (EC) No 1107/2009 Art. 63 (2g)",
$table.getEntitiesOfTypeInSameRow("must_redact", entity)
$table.getEntitiesOfTypeInSameRow("must_redact", $authorOrAddress)
);
});
end
@ -448,7 +442,6 @@ rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s
TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList()
$authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList()
then
entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY)
.ifPresent(authorEntity -> {
authorEntity.redact("CBI.12.0", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002");

View File

@ -100,18 +100,17 @@ rule "CBI.3.0: Redacted because Section contains a vertebrate"
rule "CBI.3.1: Redacted because table row contains a vertebrate"
when
$table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
$table: Table(hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
$cellsWithvertebrate: TableCell() from $table.streamTableCellsWhichContainType("vertebrate").toList()
$tableCell: TableCell(row == $cellsWithvertebrate.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.applyWithReferences(
$authorOrAddress.applyWithReferences(
"CBI.3.1",
"Vertebrate found",
"Reg (EC) No 1107/2009 Art. 63 (2g)",
$table.getEntitiesOfTypeInSameRow("vertebrate", entity)
$table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress)
);
});
end
rule "CBI.3.2: Do not redact because Section does not contain a vertebrate"
@ -150,23 +149,21 @@ rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is fo
});
end
rule "CBI.4.1: Do not redact Names and Addresses if no_redaction_indicator is found in table row"
rule "CBI.4.1: Redacted because table row contains a vertebrate"
when
$table: Table(hasEntitiesOfType("no_redaction_indicator"),
hasEntitiesOfType("vertebrate"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
$table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
TableCell($row: row) from $table.streamTableCellsWhichContainType("vertebrate").toList()
TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList()
$tableCell: TableCell(row == $row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.skipWithReferences(
$authorOrAddress.skipWithReferences(
"CBI.4.1",
"Vertebrate but a no redaction indicator found",
Stream.concat(
$table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(),
$table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList()
$table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress).stream(),
$table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $authorOrAddress).stream()).toList()
);
});
end
@ -193,22 +190,20 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red
rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row"
when
$table: Table(hasEntitiesOfType("no_redaction_indicator"),
hasEntitiesOfType("redaction_indicator"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
$table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
TableCell($row: row) from $table.streamTableCellsWhichContainType("redaction_indicator").toList()
TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList()
$tableCell: TableCell(row == $row) from $table.streamTableCells().toList()
$entity: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.applyWithReferences(
$entity.applyWithReferences(
"CBI.5.1",
"no_redaction_indicator but also redaction_indicator found",
"Reg (EC) No 1107/2009 Art. 63 (2g)",
Stream.concat(
$table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(),
$table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList()
$table.getEntitiesOfTypeInSameRow("redaction_indicator", $entity).stream(),
$table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $entity).stream()).toList()
);
});
end
@ -230,18 +225,17 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity"
rule "CBI.8.1: Redacted because table row contains must_redact entity"
when
$table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
$table: Table(hasEntitiesOfType("must_redact"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
$cellsWithMustRedact: TableCell() from $table.streamTableCellsWhichContainType("must_redact").toList()
$tableCell: TableCell(row == $cellsWithMustRedact.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.applyWithReferences(
$authorOrAddress.applyWithReferences(
"CBI.8.1",
"must_redact entity found",
"Must_redact found",
"Reg (EC) No 1107/2009 Art. 63 (2g)",
$table.getEntitiesOfTypeInSameRow("must_redact", entity)
$table.getEntitiesOfTypeInSameRow("must_redact", $authorOrAddress)
);
});
end
@ -295,7 +289,6 @@ rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s
TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList()
$authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList()
then
entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY)
.ifPresent(authorEntity -> {
authorEntity.redact("CBI.12.0", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002");

View File

@ -157,20 +157,20 @@ rule "CBI.3.0: Redacted because Section contains a vertebrate"
rule "CBI.3.1: Redacted because table row contains a vertebrate"
when
$table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
$table: Table(hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
$cellsWithvertebrate: TableCell() from $table.streamTableCellsWhichContainType("vertebrate").toList()
$tableCell: TableCell(row == $cellsWithvertebrate.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.applyWithReferences(
$authorOrAddress.applyWithReferences(
"CBI.3.1",
"Vertebrate found",
"Reg (EC) No 1107/2009 Art. 63 (2g)",
$table.getEntitiesOfTypeInSameRow("vertebrate", entity)
$table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress)
);
});
end
rule "CBI.3.2: Do not redact because Section does not contain a vertebrate"
when
$section: Section(!hasTables(), !hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
@ -207,23 +207,21 @@ rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is fo
});
end
rule "CBI.4.1: Do not redact Names and Addresses if no_redaction_indicator is found in table row"
rule "CBI.4.1: Redacted because table row contains a vertebrate"
when
$table: Table(hasEntitiesOfType("no_redaction_indicator"),
hasEntitiesOfType("vertebrate"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
$table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
TableCell($row: row) from $table.streamTableCellsWhichContainType("vertebrate").toList()
TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList()
$tableCell: TableCell(row == $row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.skipWithReferences(
$authorOrAddress.skipWithReferences(
"CBI.4.1",
"Vertebrate but a no redaction indicator found",
Stream.concat(
$table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(),
$table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList()
$table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress).stream(),
$table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $authorOrAddress).stream()).toList()
);
});
end
@ -250,22 +248,20 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red
rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row"
when
$table: Table(hasEntitiesOfType("no_redaction_indicator"),
hasEntitiesOfType("redaction_indicator"),
(hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
$table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
TableCell($row: row) from $table.streamTableCellsWhichContainType("redaction_indicator").toList()
TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList()
$tableCell: TableCell(row == $row) from $table.streamTableCells().toList()
$entity: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.applyWithReferences(
$entity.applyWithReferences(
"CBI.5.1",
"no_redaction_indicator but also redaction_indicator found",
"Reg (EC) No 1107/2009 Art. 63 (2g)",
Stream.concat(
$table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(),
$table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList()
$table.getEntitiesOfTypeInSameRow("redaction_indicator", $entity).stream(),
$table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $entity).stream()).toList()
);
});
end
@ -355,18 +351,17 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity"
rule "CBI.8.1: Redacted because table row contains must_redact entity"
when
$table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")))
$table: Table(hasEntitiesOfType("must_redact"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))
$cellsWithMustRedact: TableCell() from $table.streamTableCellsWhichContainType("must_redact").toList()
$tableCell: TableCell(row == $cellsWithMustRedact.row) from $table.streamTableCells().toList()
$authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities()
then
$table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact"))
.filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address"))
.forEach(entity -> {
entity.applyWithReferences(
$authorOrAddress.applyWithReferences(
"CBI.8.1",
"must_redact entity found",
"Must_redact found",
"Reg (EC) No 1107/2009 Art. 63 (2g)",
$table.getEntitiesOfTypeInSameRow("must_redact", entity)
$table.getEntitiesOfTypeInSameRow("must_redact", $authorOrAddress)
);
});
end
@ -448,7 +443,6 @@ rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s
TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList()
$authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList()
then
entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY)
.ifPresent(authorEntity -> {
authorEntity.redact("CBI.12.0", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002");