diff --git a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts index 9f605928..c8582faa 100644 --- a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts @@ -12,7 +12,7 @@ plugins { description = "redaction-service-server-v1" -val layoutParserVersion = "0.86.0" +val layoutParserVersion = "0.91.0" val jacksonVersion = "2.15.2" val droolsVersion = "9.44.0.Final" val pdfBoxVersion = "3.0.0" @@ -67,6 +67,7 @@ dependencies { testImplementation("org.apache.pdfbox:pdfbox-tools:${pdfBoxVersion}") testImplementation("org.springframework.boot:spring-boot-starter-test:${springBootStarterVersion}") + testImplementation("com.knecon.fforesight:viewer-doc-processor:${layoutParserVersion}") testImplementation("com.knecon.fforesight:layoutparser-service-processor:${layoutParserVersion}") { exclude( group = "com.iqser.red.service", diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java index 100f3fe9..4d56c729 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java @@ -9,6 +9,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.Stream; @@ -64,8 +65,7 @@ public class Table implements SemanticNode { */ public Stream streamEntitiesWhereRowContainsStringsIgnoreCase(List strings) { - return IntStream.range(0, numberOfRows) - .boxed() + return IntStream.range(0, numberOfRows).boxed() .filter(row -> rowContainsStringsIgnoreCase(row, strings)) .flatMap(this::streamRow) .map(TableCell::getEntities) @@ -82,8 +82,11 @@ public class Table implements SemanticNode { */ public boolean rowContainsStringsIgnoreCase(Integer row, List strings) { - String rowText = streamRow(row).map(TableCell::getTextBlock).collect(new TextBlockCollector()).getSearchText().toLowerCase(Locale.ROOT); - return strings.stream().map(String::toLowerCase).allMatch(rowText::contains); + String rowText = streamRow(row).map(TableCell::getTextBlock) + .collect(new TextBlockCollector()).getSearchText().toLowerCase(Locale.ROOT); + return strings.stream() + .map(String::toLowerCase) + .allMatch(rowText::contains); } @@ -96,9 +99,13 @@ public class Table implements SemanticNode { */ public Stream streamEntitiesWhereRowHasHeaderAndValue(String header, String value) { - List vertebrateStudyCols = streamHeaders().filter(headerNode -> headerNode.containsString(header)).map(TableCell::getCol).toList(); + List vertebrateStudyCols = streamHeaders().filter(headerNode -> headerNode.containsString(header)) + .map(TableCell::getCol) + .toList(); return streamTableCells().filter(tableCellNode -> vertebrateStudyCols.stream() - .anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value))).map(TableCell::getEntities).flatMap(Collection::stream); + .anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value))) + .map(TableCell::getEntities) + .flatMap(Collection::stream); } @@ -111,9 +118,13 @@ public class Table implements SemanticNode { */ public Stream streamEntitiesWhereRowHasHeaderAndAnyValue(String header, List values) { - List colsWithHeader = streamHeaders().filter(headerNode -> headerNode.containsString(header)).map(TableCell::getCol).toList(); + List colsWithHeader = streamHeaders().filter(headerNode -> headerNode.containsString(header)) + .map(TableCell::getCol) + .toList(); return streamTableCells().filter(tableCellNode -> colsWithHeader.stream() - .anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values))).map(TableCell::getEntities).flatMap(Collection::stream); + .anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values))) + .map(TableCell::getEntities) + .flatMap(Collection::stream); } @@ -126,16 +137,33 @@ public class Table implements SemanticNode { */ public Stream streamEntitiesWhereRowContainsEntitiesOfType(List types) { - List rowsWithEntityOfType = getEntities().stream() - .filter(TextEntity::active) - .filter(redactionEntity -> types.stream().anyMatch(type -> type.equals(redactionEntity.type()))) - .map(TextEntity::getIntersectingNodes) - .filter(node -> node instanceof TableCell) - .map(node -> (TableCell) node) - .map(TableCell::getRow) - .toList(); + return IntStream.range(0, numberOfRows).boxed() + .filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type) + .anyMatch(types::contains)) + .flatMap(this::streamRow) + .map(TableCell::getEntities) + .flatMap(Collection::stream); + } - return rowsWithEntityOfType.stream().flatMap(this::streamRow).map(TableCell::getEntities).flatMap(Collection::stream); + + /** + * Streams all entities in this table, that appear in a row, which contains at least one entity of each of the provided types. + * Ignores Entity with ignored == true or removed == true. + * + * @param types type strings to check whether a row contains an entity like them + * @return Stream of all entities in this table, that appear in a row, which contains at least one entity of each of the provided types. + */ + public Stream streamEntitiesWhereRowContainsEntitiesOfEachType(List types) { + + return IntStream.range(0, numberOfRows).boxed() + .filter(rowNumber -> { + Set entityTypes = streamTextEntitiesInRow(rowNumber).map(TextEntity::type) + .collect(Collectors.toSet()); + return entityTypes.containsAll(types); + }) + .flatMap(this::streamRow) + .map(TableCell::getEntities) + .flatMap(Collection::stream); } @@ -148,18 +176,43 @@ public class Table implements SemanticNode { */ public Stream streamEntitiesWhereRowContainsNoEntitiesOfType(List types) { - return IntStream.range(0, numberOfRows) - .boxed() - .filter(rowNumber -> streamRow(rowNumber).map(TableCell::getEntities) - .flatMap(Collection::stream) - .filter(TextEntity::active) - .noneMatch(entity -> types.contains(entity.type()))) + return IntStream.range(0, numberOfRows).boxed() + .filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type) + .noneMatch(types::contains)) .flatMap(this::streamRow) .map(TableCell::getEntities) .flatMap(Collection::stream); } + /** + * Streams all Entities in the given row. + * + * @param rowNumber the row number to look for + * @return stream of TextEntities occurring in row + */ + public Stream streamTextEntitiesInRow(int rowNumber) { + + return streamRow(rowNumber).map(TableCell::getEntities) + .flatMap(Collection::stream) + .filter(TextEntity::active); + } + + + /** + * Streams all Entities in the given col. + * + * @param colNumber the column number to look for + * @return stream of TextEntities occurring in row + */ + public Stream streamTextEntitiesInCol(int colNumber) { + + return streamCol(colNumber).map(TableCell::getEntities) + .flatMap(Collection::stream) + .filter(TextEntity::active); + } + + /** * Returns a TableCell at the provided row and column location. * @@ -173,7 +226,8 @@ public class Table implements SemanticNode { throw new IllegalArgumentException(format("row %d, col %d is out of bounds for number of rows of %d and number of cols %d", row, col, numberOfRows, numberOfCols)); } int idx = row * numberOfCols + col; - return (TableCell) documentTree.getEntryById(treeId).getChildren().get(idx).getNode(); + return (TableCell) documentTree.getEntryById(treeId).getChildren() + .get(idx).getNode(); } @@ -196,7 +250,7 @@ public class Table implements SemanticNode { */ public Stream streamTableCellsWhichContainType(String type) { - return streamTableCells().filter(tableCell -> tableCell.getEntities().stream().filter(TextEntity::active).anyMatch(entity -> entity.type().equals(type))); + return streamTableCells().filter(tableCell -> tableCell.hasEntitiesOfType(type)); } @@ -222,7 +276,8 @@ public class Table implements SemanticNode { */ public Stream streamCol(int col) { - return IntStream.range(0, numberOfRows).boxed().map(row -> getCell(row, col)); + return IntStream.range(0, numberOfRows).boxed() + .map(row -> getCell(row, col)); } @@ -234,7 +289,8 @@ public class Table implements SemanticNode { */ public Stream streamRow(int row) { - return IntStream.range(0, numberOfCols).boxed().map(col -> getCell(row, col)); + return IntStream.range(0, numberOfCols).boxed() + .map(col -> getCell(row, col)); } @@ -258,7 +314,8 @@ public class Table implements SemanticNode { */ public Stream streamHeadersForCell(int row, int col) { - return Stream.concat(streamRow(row), streamCol(col)).filter(TableCell::isHeader); + return Stream.concat(streamRow(row), streamCol(col)) + .filter(TableCell::isHeader); } @@ -348,7 +405,9 @@ public class Table implements SemanticNode { public TextBlock getTextBlock() { if (textBlock == null) { - textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector()); + textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf) + .map(SemanticNode::getLeafTextBlock) + .collect(new TextBlockCollector()); } return textBlock; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java new file mode 100644 index 00000000..fad981c5 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/TableTest.java @@ -0,0 +1,162 @@ +package com.iqser.red.service.redaction.v1.server.document.graph; + +import static com.iqser.red.service.redaction.v1.server.utils.EntityVisualizationUtility.ENTITY_LAYER; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.awt.Color; +import java.io.File; +import java.io.FileOutputStream; +import java.nio.file.Path; +import java.util.List; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; + +import org.drools.io.ClassPathResource; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; +import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; +import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; +import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService; +import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; +import com.iqser.red.service.redaction.v1.server.utils.EntityVisualizationUtility; +import com.knecon.fforesight.service.viewerdoc.model.Visualizations; +import com.knecon.fforesight.service.viewerdoc.service.ViewerDocumentService; +import com.knecon.fforesight.tenantcommons.TenantContext; + +import lombok.SneakyThrows; + +public class TableTest extends BuildDocumentIntegrationTest { + + private static final boolean DRAW_FILE = false; + + @Autowired + private EntityEnrichmentService entityEnrichmentService; + + private EntityCreationService entityCreationService; + + private static final String TYPE_1 = "type1"; + private static final String TYPE_2 = "type2"; + private static final String TYPE_3 = "type3"; + private static final String TYPE_4 = "type4"; + + private Table table; + + private Set entities; + + + @SneakyThrows + @BeforeEach + public void createTable() { + + entityCreationService = new EntityCreationService(entityEnrichmentService); + + String fileName = "files/Minimal Examples/BasicTable.pdf"; + + Document document = buildGraph(fileName); + + table = (Table) document.streamAllSubNodesOfType(NodeType.TABLE) + .findAny() + .orElseThrow(); + + entities = List.of(// + entityCreationService.byString("Cell11", TYPE_1, EntityType.ENTITY, document), + entityCreationService.byString("Cell21", TYPE_1, EntityType.ENTITY, document), + entityCreationService.byString("Cell31", TYPE_1, EntityType.ENTITY, document), + entityCreationService.byString("Cell41", TYPE_1, EntityType.ENTITY, document), + entityCreationService.byString("Cell51", TYPE_1, EntityType.ENTITY, document), + + entityCreationService.byString("Cell12", TYPE_2, EntityType.ENTITY, document), + entityCreationService.byString("Cell32", TYPE_2, EntityType.ENTITY, document), + entityCreationService.byString("Cell42", TYPE_2, EntityType.ENTITY, document), + + entityCreationService.byString("Cell23", TYPE_3, EntityType.ENTITY, document), + entityCreationService.byString("Cell53", TYPE_3, EntityType.ENTITY, document), + + entityCreationService.byString("Cell14", TYPE_4, EntityType.ENTITY, document), + entityCreationService.byString("Cell34", TYPE_4, EntityType.ENTITY, document)) + .stream() + .flatMap(Function.identity()) + .collect(Collectors.toSet()); + + if (DRAW_FILE) { + File file = new File("/tmp/" + Path.of(fileName).getFileName().toString()); + storageService.downloadTo(TenantContext.getTenantId(), + RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.VIEWER_DOCUMENT), + file); + ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null); + + var visualizationsOnPage = EntityVisualizationUtility.createVisualizationsOnPage(document.getEntities(), Color.MAGENTA); + + viewerDocumentService.addVisualizationsOnPage(file, + file, + Visualizations.builder() + .layer(ENTITY_LAYER) + .visualizationsOnPages(visualizationsOnPage) + .layerVisibilityDefaultValue(true) + .build()); + } + + } + + + @Test + public void testStreamEntitiesWhereRowContainsEntitiesOfType() { + + int type_2_count = table.getEntitiesOfType(TYPE_2).size(); + + assertEquals(type_2_count, + table.streamEntitiesWhereRowContainsEntitiesOfType(List.of(TYPE_1)) + .filter(textEntity -> textEntity.type().equals(TYPE_2)) + .count()); + + assertEquals(type_2_count, + table.streamEntitiesWhereRowContainsEntitiesOfType(List.of(TYPE_1, TYPE_4)) + .filter(textEntity -> textEntity.type().equals(TYPE_2)) + .count()); + + assertEquals(2, + table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of(TYPE_1, TYPE_4)) + .filter(textEntity -> textEntity.type().equals(TYPE_2)) + .count()); + + assertEquals(0, + table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of(TYPE_1, TYPE_3)) + .filter(textEntity -> textEntity.type().equals(TYPE_2)) + .count()); + + assertEquals(0, + table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of(TYPE_1, TYPE_3, TYPE_4)) + .filter(textEntity -> textEntity.type().equals(TYPE_2)) + .count()); + + assertEquals(type_2_count, + table.streamEntitiesWhereRowContainsEntitiesOfEachType(List.of()) + .filter(textEntity -> textEntity.type().equals(TYPE_2)) + .count()); + + assertEquals(3, + table.streamTextEntitiesInRow(1) + .count()); + + assertEquals(2, + table.streamTextEntitiesInRow(4) + .count()); + + assertEquals(5, + table.streamTextEntitiesInCol(1) + .count()); + + assertEquals(3, + table.streamTextEntitiesInRow(3) + .count()); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java new file mode 100644 index 00000000..7fae90bc --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/EntityVisualizationUtility.java @@ -0,0 +1,61 @@ +package com.iqser.red.service.redaction.v1.server.utils; + +import java.awt.Color; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.pdfbox.cos.COSName; + +import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage; +import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; +import com.knecon.fforesight.service.viewerdoc.ContentStreams; +import com.knecon.fforesight.service.viewerdoc.model.ColoredRectangle; +import com.knecon.fforesight.service.viewerdoc.model.VisualizationsOnPage; + +import lombok.experimental.UtilityClass; + +@UtilityClass +public class EntityVisualizationUtility { + + public static final ContentStreams.Identifier ENTITY_LAYER = new ContentStreams.Identifier("Entities", COSName.getPDFName("KNECON_ENTITIES"), true); + + + public Map createVisualizationsOnPage(Collection entity, Color color) { + + Map visualizations = new HashMap<>(); + Set pages = entity.stream() + .map(TextEntity::getPages) + .flatMap(Collection::stream) + .collect(Collectors.toSet()); + + pages.forEach(page -> visualizations.put(page.getNumber() - 1, buildVisualizationsOnPage(color, page))); + + return visualizations; + } + + + private static VisualizationsOnPage buildVisualizationsOnPage(Color color, Page page) { + + return VisualizationsOnPage.builder().coloredRectangles(getEntityRectangles(color, page)).build(); + } + + + private static List getEntityRectangles(Color color, Page page) { + + return page.getEntities() + .stream() + .map(TextEntity::getPositionsOnPagePerPage) + .flatMap(Collection::stream) + .filter(p -> p.getPage().equals(page)) + .map(PositionOnPage::getRectanglePerLine) + .flatMap(Collection::stream) + .map(r -> new ColoredRectangle(r, color, 1)) + .toList(); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java index f9eaa926..f4625012 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/utils/LayoutParsingRequestProvider.java @@ -34,6 +34,7 @@ public class LayoutParsingRequestProvider { .positionBlockFileStorageId(positionBlockFileStorageId) .pageFileStorageId(pageFileStorageId) .simplifiedTextStorageId(simplifiedTextStorageId).viewerDocumentStorageId(viewerDocumentStorageId) + .visualLayoutParsingFileId(Optional.empty()) .build(); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl index 95e4bf27..b1fbf077 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/all_redact_manager_rules.drl @@ -157,18 +157,17 @@ rule "CBI.3.0: Redacted because Section contains a vertebrate" rule "CBI.3.1: Redacted because table row contains a vertebrate" when - $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + $cellsWithvertebrate: TableCell() from $table.streamTableCellsWhichContainType("vertebrate").toList() + $tableCell: TableCell(row == $cellsWithvertebrate.row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( + $authorOrAddress.applyWithReferences( "CBI.3.1", "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("vertebrate", entity) + $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress) ); - }); end rule "CBI.3.2: Do not redact because Section does not contain a vertebrate" @@ -207,23 +206,21 @@ rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is fo }); end -rule "CBI.4.1: Do not redact Names and Addresses if no_redaction_indicator is found in table row" +rule "CBI.4.1: Redacted because table row contains a vertebrate" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("vertebrate"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + TableCell($row: row) from $table.streamTableCellsWhichContainType("vertebrate").toList() + TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() + $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.skipWithReferences( + $authorOrAddress.skipWithReferences( "CBI.4.1", "Vertebrate but a no redaction indicator found", Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() + $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $authorOrAddress).stream()).toList() ); - }); end @@ -250,22 +247,20 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + TableCell($row: row) from $table.streamTableCellsWhichContainType("redaction_indicator").toList() + TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() + $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() + $entity: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( + $entity.applyWithReferences( "CBI.5.1", "no_redaction_indicator but also redaction_indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)", Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() + $table.getEntitiesOfTypeInSameRow("redaction_indicator", $entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $entity).stream()).toList() ); - }); end @@ -355,18 +350,17 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity" rule "CBI.8.1: Redacted because table row contains must_redact entity" when - $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("must_redact"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + $cellsWithMustRedact: TableCell() from $table.streamTableCellsWhichContainType("must_redact").toList() + $tableCell: TableCell(row == $cellsWithMustRedact.row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( + $authorOrAddress.applyWithReferences( "CBI.8.1", - "must_redact entity found", + "Must_redact found", "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", entity) + $table.getEntitiesOfTypeInSameRow("must_redact", $authorOrAddress) ); - }); end @@ -448,7 +442,6 @@ rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() $authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() then - entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) .ifPresent(authorEntity -> { authorEntity.redact("CBI.12.0", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index fdd704b7..aed445b2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -100,18 +100,17 @@ rule "CBI.3.0: Redacted because Section contains a vertebrate" rule "CBI.3.1: Redacted because table row contains a vertebrate" when - $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + $cellsWithvertebrate: TableCell() from $table.streamTableCellsWhichContainType("vertebrate").toList() + $tableCell: TableCell(row == $cellsWithvertebrate.row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( + $authorOrAddress.applyWithReferences( "CBI.3.1", "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("vertebrate", entity) + $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress) ); - }); end rule "CBI.3.2: Do not redact because Section does not contain a vertebrate" @@ -150,23 +149,21 @@ rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is fo }); end -rule "CBI.4.1: Do not redact Names and Addresses if no_redaction_indicator is found in table row" +rule "CBI.4.1: Redacted because table row contains a vertebrate" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("vertebrate"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + TableCell($row: row) from $table.streamTableCellsWhichContainType("vertebrate").toList() + TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() + $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.skipWithReferences( + $authorOrAddress.skipWithReferences( "CBI.4.1", "Vertebrate but a no redaction indicator found", Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() + $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $authorOrAddress).stream()).toList() ); - }); end @@ -193,22 +190,20 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + TableCell($row: row) from $table.streamTableCellsWhichContainType("redaction_indicator").toList() + TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() + $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() + $entity: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( + $entity.applyWithReferences( "CBI.5.1", "no_redaction_indicator but also redaction_indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)", Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() + $table.getEntitiesOfTypeInSameRow("redaction_indicator", $entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $entity).stream()).toList() ); - }); end @@ -230,18 +225,17 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity" rule "CBI.8.1: Redacted because table row contains must_redact entity" when - $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("must_redact"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + $cellsWithMustRedact: TableCell() from $table.streamTableCellsWhichContainType("must_redact").toList() + $tableCell: TableCell(row == $cellsWithMustRedact.row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( + $authorOrAddress.applyWithReferences( "CBI.8.1", - "must_redact entity found", + "Must_redact found", "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", entity) + $table.getEntitiesOfTypeInSameRow("must_redact", $authorOrAddress) ); - }); end @@ -295,7 +289,6 @@ rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() $authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() then - entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) .ifPresent(authorEntity -> { authorEntity.redact("CBI.12.0", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/BasicTable.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/BasicTable.pdf new file mode 100644 index 00000000..f692fccb Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/BasicTable.pdf differ diff --git a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl index ec55a2e6..57124c27 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_redact_manager_rules.drl @@ -157,20 +157,20 @@ rule "CBI.3.0: Redacted because Section contains a vertebrate" rule "CBI.3.1: Redacted because table row contains a vertebrate" when - $table: Table(hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + $cellsWithvertebrate: TableCell() from $table.streamTableCellsWhichContainType("vertebrate").toList() + $tableCell: TableCell(row == $cellsWithvertebrate.row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( + $authorOrAddress.applyWithReferences( "CBI.3.1", "Vertebrate found", "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("vertebrate", entity) + $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress) ); - }); end + rule "CBI.3.2: Do not redact because Section does not contain a vertebrate" when $section: Section(!hasTables(), !hasEntitiesOfType("vertebrate"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) @@ -207,23 +207,21 @@ rule "CBI.4.0: Do not redact Names and Addresses if no_redaction_indicator is fo }); end -rule "CBI.4.1: Do not redact Names and Addresses if no_redaction_indicator is found in table row" +rule "CBI.4.1: Redacted because table row contains a vertebrate" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("vertebrate"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("vertebrate"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + TableCell($row: row) from $table.streamTableCellsWhichContainType("vertebrate").toList() + TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() + $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("vertebrate", "no-redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.skipWithReferences( + $authorOrAddress.skipWithReferences( "CBI.4.1", "Vertebrate but a no redaction indicator found", Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() + $table.getEntitiesOfTypeInSameRow("vertebrate", $authorOrAddress).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $authorOrAddress).stream()).toList() ); - }); end @@ -250,22 +248,20 @@ rule "CBI.5.0: Redact Names and Addresses if no_redaction_indicator but also red rule "CBI.5.1: Redact Names and Addresses if no_redaction_indicator but also redaction_indicator is found in table row" when - $table: Table(hasEntitiesOfType("no_redaction_indicator"), - hasEntitiesOfType("redaction_indicator"), - (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("no_redaction_indicator"), hasEntitiesOfType("redaction_indicator"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + TableCell($row: row) from $table.streamTableCellsWhichContainType("redaction_indicator").toList() + TableCell(row == $row) from $table.streamTableCellsWhichContainType("no_redaction_indicator").toList() + $tableCell: TableCell(row == $row) from $table.streamTableCells().toList() + $entity: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("redaction_indicator", "no_redaction_indicator")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( + $entity.applyWithReferences( "CBI.5.1", "no_redaction_indicator but also redaction_indicator found", "Reg (EC) No 1107/2009 Art. 63 (2g)", Stream.concat( - $table.getEntitiesOfTypeInSameRow("vertebrate", entity).stream(), - $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", entity).stream()).toList() + $table.getEntitiesOfTypeInSameRow("redaction_indicator", $entity).stream(), + $table.getEntitiesOfTypeInSameRow("no_redaction_indicator", $entity).stream()).toList() ); - }); end @@ -355,18 +351,17 @@ rule "CBI.8.0: Redacted because Section contains must_redact entity" rule "CBI.8.1: Redacted because table row contains must_redact entity" when - $table: Table(hasEntitiesOfType("must_redact"), (hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address"))) + $table: Table(hasEntitiesOfType("must_redact"), hasEntitiesOfType("CBI_author") || hasEntitiesOfType("CBI_address")) + $cellsWithMustRedact: TableCell() from $table.streamTableCellsWhichContainType("must_redact").toList() + $tableCell: TableCell(row == $cellsWithMustRedact.row) from $table.streamTableCells().toList() + $authorOrAddress: TextEntity(type() == "CBI_author" || type() == "CBI_address", active()) from $tableCell.getEntities() then - $table.streamEntitiesWhereRowContainsEntitiesOfType(List.of("must_redact")) - .filter(entity -> entity.getType().equals("CBI_author") || entity.getType().equals("CBI_address")) - .forEach(entity -> { - entity.applyWithReferences( + $authorOrAddress.applyWithReferences( "CBI.8.1", - "must_redact entity found", + "Must_redact found", "Reg (EC) No 1107/2009 Art. 63 (2g)", - $table.getEntitiesOfTypeInSameRow("must_redact", entity) + $table.getEntitiesOfTypeInSameRow("must_redact", $authorOrAddress) ); - }); end @@ -448,7 +443,6 @@ rule "CBI.12.0: Redact and recommend TableCell with header 'Author' or 'Author(s TableCell(!header, containsAnyString("Yes", "Y"), $rowWithYes: row) from $table.streamCol($vertebrateCol).toList() $authorCell: TableCell(row == $rowWithYes) from $table.streamCol($authorCol).toList() then - entityCreationService.bySemanticNode($authorCell, "CBI_author", EntityType.ENTITY) .ifPresent(authorEntity -> { authorEntity.redact("CBI.12.0", "Redacted because it's row belongs to a vertebrate study", "Article 39(e)(3) of Regulation (EC) No 178/2002");