RED-10708: Tables as components in DM #583

Open
maverick.studer wants to merge 5 commits from RED-10708 into master
34 changed files with 2845 additions and 219 deletions

View File

@ -10,6 +10,7 @@ import java.util.Optional;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.GenericSemanticNode;
@ -362,12 +363,15 @@ public class DocumentTree {
}
public void addEntityToGraph(TextEntity entity) {
public void addEntityToGraph(SemanticEntity entity) {
getRoot().getNode().addThisToEntityIfIntersects(entity);
TextBlock textBlock = entity.getDeepestFullyContainingNode().getTextBlock();
EntityEnrichmentService.enrichEntity(entity, textBlock);
if (entity instanceof TextEntity textEntity) {
EntityEnrichmentService.enrichEntity(textEntity, textBlock);
}
EntityCreationUtility.addToPages(entity);
EntityCreationUtility.addEntityToNodeEntitySets(entity);
@ -378,6 +382,7 @@ public class DocumentTree {
entity.computeRelations();
entity.notifyEntityInserted();
}

View File

@ -188,7 +188,7 @@ public interface IEntity {
*
* @return A set of references.
*/
default Set<TextEntity> references() {
default Set<SemanticEntity> references() {
return getMatchedRule().getReferences();
}

View File

@ -40,7 +40,7 @@ public final class MatchedRule implements Comparable<MatchedRule> {
boolean ignored;
@Builder.Default
Set<TextEntity> references = Collections.emptySet();
Set<SemanticEntity> references = Collections.emptySet();
/**

View File

@ -0,0 +1,130 @@
package com.iqser.red.service.redaction.v1.server.model.document.entity;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import org.apache.commons.collections4.map.HashedMap;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import lombok.AccessLevel;
import lombok.Builder;
import lombok.Data;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
@Data
@FieldDefaults(level = AccessLevel.PROTECTED)
@SuperBuilder
public abstract class SemanticEntity implements IEntity {
final EntityType entityType;
String type; // TODO: make final once ManualChangesApplicationService::recategorize is deleted
@Builder.Default
final PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
@Builder.Default
final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
@Builder.Default
Set<Page> pages = new HashSet<>();
List<PositionOnPage> positionsOnPagePerPage;
@Builder.Default
List<SemanticNode> intersectingNodes = new LinkedList<>();
SemanticNode deepestFullyContainingNode;
@Builder.Default
Map<SemanticEntity, Set<Relation>> relations = new HashMap<>();
@Builder.Default
Collection<EntityEventListener> entityEventListeners = new ArrayList<>();
/**
* @return true when this entity is of EntityType ENTITY or HINT
*/
public boolean validEntityType() {
return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT);
}
@Override
public boolean valid() {
return active() && validEntityType();
}
public boolean isType(String type) {
return type().equals(type);
}
public boolean isAnyType(List<String> types) {
return types.contains(type());
}
public boolean matchesAnnotationId(String manualRedactionId) {
return getPositionsOnPagePerPage().stream()
.anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
}
public void addIntersectingNode(SemanticNode containingNode) {
intersectingNodes.add(containingNode);
}
@Override
public void addEntityEventListener(EntityEventListener listener) {
entityEventListeners.add(listener);
}
@Override
public void removeEntityEventListener(EntityEventListener listener) {
entityEventListeners.remove(listener);
}
@Override
public Collection<EntityEventListener> getEntityEventListeners() {
return entityEventListeners;
}
public abstract void computeRelations();
public void removeFromGraph() {
remove("FINAL.0.0", "removed completely");
intersectingNodes.forEach(node -> node.getEntities().remove(this));
pages.forEach(page -> page.getSemanticEntities().remove(this));
intersectingNodes = new LinkedList<>();
relations.keySet()
.forEach(entity -> entity.getRelations().remove(this));
relations = new HashedMap<>();
deepestFullyContainingNode = null;
pages = new HashSet<>();
}
}

View File

@ -0,0 +1,102 @@
package com.iqser.red.service.redaction.v1.server.model.document.entity;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import org.apache.commons.collections4.map.HashedMap;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
@Data
@SuperBuilder
@FieldDefaults(level = AccessLevel.PRIVATE)
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
public class TableEntity extends SemanticEntity {
@EqualsAndHashCode.Include
final String id;
Table table;
public static TableEntity initialEntityNode(Table table, String type, EntityType entityType) {
return TableEntity.builder()
.id(table.buildId(table.getTextRange(), type, entityType))
.type(type)
.entityType(entityType)
.manualOverwrite(new ManualChangeOverwrite(entityType))
.table(table)
.build();
}
@Override
public String getValue() {
return "Table:" + table.getHeadline();
}
@Override
public TextRange getTextRange() {
return table.getTextBlock().getTextRange();
}
@Override
public String type() {
return getManualOverwrite().getType()
.orElse(NodeType.TABLE.toString());
}
@Override
public List<PositionOnPage> getPositionsOnPagePerPage() {
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = table.getTextBlock().getPositionsPerPage(table.getTextRange());
positionsOnPagePerPage = rectanglesPerLinePerPage.entrySet()
.stream()
.map(entry -> new PositionOnPage(id, entry.getKey(), entry.getValue()))
.toList();
}
return positionsOnPagePerPage;
}
public String asCsv() {
return table.asCsv();
}
@Override
public void computeRelations() {
// NO - OP
// can be implemented in the future
}
}

View File

@ -1,15 +1,11 @@
package com.iqser.red.service.redaction.v1.server.model.document.entity;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import org.apache.commons.collections4.map.HashedMap;
@ -19,26 +15,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.utils.IdBuilder;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
/**
* Represents a text entity within a document, characterized by its text range, type, entity type,
* and associated metadata like matched rules, pages, and engines.
*/
@Data
@Builder
@AllArgsConstructor
@SuperBuilder
@FieldDefaults(level = AccessLevel.PRIVATE)
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
@SuppressWarnings("PMD.AvoidFieldNameMatchingMethodName")
public class TextEntity implements IEntity {
public class TextEntity extends SemanticEntity {
// primary key
@EqualsAndHashCode.Include
@ -48,13 +42,6 @@ public class TextEntity implements IEntity {
TextRange textRange;
@Builder.Default
Set<TextRange> duplicateTextRanges = new HashSet<>();
String type; // TODO: make final once ManualChangesApplicationService::recategorize is deleted
final EntityType entityType;
@Builder.Default
final PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
@Builder.Default
final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
boolean dictionaryEntry;
boolean dossierDictionaryEntry;
@ -66,24 +53,12 @@ public class TextEntity implements IEntity {
String value;
String textBefore;
String textAfter;
@Builder.Default
Set<Page> pages = new HashSet<>();
List<PositionOnPage> positionsOnPagePerPage;
@Builder.Default
List<SemanticNode> intersectingNodes = new LinkedList<>();
SemanticNode deepestFullyContainingNode;
@Builder.Default
Map<TextEntity, Set<Relation>> relations = new HashMap<>();
@Builder.Default
Collection<EntityEventListener> entityEventListeners = new ArrayList<>();
public static TextEntity initialEntityNode(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
return TextEntity.builder()
.id(buildId(node, textRange, type, entityType))
.id(node.buildId(textRange, type, entityType))
.type(type)
.entityType(entityType)
.textRange(textRange)
@ -110,19 +85,6 @@ public class TextEntity implements IEntity {
}
private static String buildId(SemanticNode node, TextRange textRange, String type, EntityType entityType) {
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = node.getPositionsPerPage(textRange);
return IdBuilder.buildId(rectanglesPerLinePerPage.keySet(),
rectanglesPerLinePerPage.values()
.stream()
.flatMap(Collection::stream)
.toList(),
type,
entityType.name());
}
public void addTextRange(TextRange textRange) {
duplicateTextRanges.add(textRange);
@ -143,44 +105,13 @@ public class TextEntity implements IEntity {
}
public boolean isType(String type) {
return type().equals(type);
}
public boolean isAnyType(List<String> types) {
return types.contains(type());
}
public void addIntersectingNode(SemanticNode containingNode) {
intersectingNodes.add(containingNode);
}
public String getValueWithLineBreaks() {
return getDeepestFullyContainingNode().getTextBlock().subSequenceWithLineBreaks(getTextRange());
}
public void removeFromGraph() {
remove("FINAL.0.0", "removed completely");
intersectingNodes.forEach(node -> node.getEntities().remove(this));
pages.forEach(page -> page.getEntities().remove(this));
intersectingNodes = new LinkedList<>();
relations.keySet()
.forEach(entity -> entity.getRelations().remove(this));
relations = new HashedMap<>();
deepestFullyContainingNode = null;
pages = new HashSet<>();
}
@Override
public List<PositionOnPage> getPositionsOnPagePerPage() {
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {
@ -215,6 +146,7 @@ public class TextEntity implements IEntity {
return textEntity.contains(this);
}
public boolean contains(TextEntity textEntity) {
if (this.textRange.contains(textEntity.getTextRange())) {
@ -239,7 +171,6 @@ public class TextEntity implements IEntity {
}
public boolean intersects(TextEntity textEntity) {
return this.textRange.intersects(textEntity.getTextRange()) //
@ -277,14 +208,6 @@ public class TextEntity implements IEntity {
notifyEntityUpdated();
}
public boolean matchesAnnotationId(String manualRedactionId) {
return getPositionsOnPagePerPage().stream()
.anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
}
@Override
public String toString() {
@ -316,21 +239,6 @@ public class TextEntity implements IEntity {
}
/**
* @return true when this entity is of EntityType ENTITY or HINT
*/
public boolean validEntityType() {
return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT);
}
public boolean valid() {
return active() && validEntityType();
}
@Override
public String value() {
@ -339,41 +247,32 @@ public class TextEntity implements IEntity {
}
@Override
public void addEntityEventListener(EntityEventListener listener) {
entityEventListeners.add(listener);
}
@Override
public void removeEntityEventListener(EntityEventListener listener) {
entityEventListeners.remove(listener);
}
public void computeRelations() {
for (TextEntity textEntity : this.getDeepestFullyContainingNode().getEntities()) {
if (this.intersects(textEntity) && !this.equals(textEntity) && !textEntity.getEntityType().equals(EntityType.TEMPORARY)) {
if (textEntity.getTextRange().equals(this.getTextRange())) {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Equality(this, textEntity));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Equality(textEntity, this));
} else if (textEntity.containedBy(this)) {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Containment(this, textEntity));
} else if (this.containedBy(textEntity)) {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Containment(textEntity, this));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
} else {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
}
this.getDeepestFullyContainingNode().getEntities()
.stream()
.filter(semanticEntity -> semanticEntity instanceof TextEntity)
.map(semanticEntity -> (TextEntity) semanticEntity)
.forEach(textEntity -> {
if (this.intersects(textEntity) && !this.equals(textEntity) && !textEntity.getEntityType().equals(EntityType.TEMPORARY)) {
if (textEntity.getTextRange().equals(this.getTextRange())) {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Equality(this, textEntity));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Equality(textEntity, this));
} else if (textEntity.containedBy(this)) {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Containment(this, textEntity));
} else if (this.containedBy(textEntity)) {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Containment(textEntity, this));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
} else {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
}
}
});
}
}
}
}

View File

@ -7,6 +7,7 @@ import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
@ -38,7 +39,7 @@ public abstract class AbstractSemanticNode implements GenericSemanticNode {
DocumentTree documentTree;
@Builder.Default
Set<TextEntity> entities = new HashSet<>();
Set<SemanticEntity> entities = new HashSet<>();
Map<Page, Rectangle2D> bBoxCache;
@ -61,7 +62,7 @@ public abstract class AbstractSemanticNode implements GenericSemanticNode {
@Override
public Map<Page, Rectangle2D> getBBox() {
public Map<Page, Rectangle2D>getBBox() {
if (bBoxCache == null) {
bBoxCache = GenericSemanticNode.super.getBBox();

View File

@ -5,6 +5,7 @@ import java.util.List;
import java.util.Set;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
@ -42,7 +43,7 @@ public class Page {
Footer footer;
@Builder.Default
Set<TextEntity> entities = new HashSet<>();
Set<SemanticEntity> semanticEntities = new HashSet<>();
@Builder.Default
Set<Image> images = new HashSet<>();

View File

@ -1,6 +1,9 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;

View File

@ -4,6 +4,7 @@ import static java.lang.String.format;
import java.awt.geom.Rectangle2D;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
@ -17,12 +18,16 @@ import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConsecutiveTextBlockCollector;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.iqser.red.service.redaction.v1.server.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility;
@ -74,7 +79,25 @@ public interface SemanticNode {
*
* @return Set of all Entities associated with this Node
*/
Set<TextEntity> getEntities();
Set<SemanticEntity> getEntities();
default Set<TextEntity> getTextEntities() {
return getEntities().stream()
.filter(semanticEntity -> semanticEntity instanceof TextEntity)
.map(semanticEntity -> (TextEntity) semanticEntity)
.collect(Collectors.toSet());
}
default Set<TableEntity> getTableEntities() {
return getEntities().stream()
.filter(semanticEntity -> semanticEntity instanceof TableEntity)
.map(semanticEntity -> (TableEntity) semanticEntity)
.collect(Collectors.toSet());
}
/**
@ -85,9 +108,9 @@ public interface SemanticNode {
*/
default Stream<TextEntity> streamValidEntities() {
return getEntities().stream()
return getTextEntities().stream()
.filter(IEntity::active)
.filter(TextEntity::validEntityType);
.filter(SemanticEntity::validEntityType);
}
@ -638,18 +661,18 @@ public interface SemanticNode {
* This function is used during insertion of EntityNodes into the graph, it checks if the TextRange of the RedactionEntity intersects or even contains the RedactionEntity.
* It sets the fields accordingly and recursively calls this function on all its children.
*
* @param textEntity RedactionEntity, which is being inserted into the graph
* @param entity RedactionEntity, which is being inserted into the graph
*/
default void addThisToEntityIfIntersects(TextEntity textEntity) {
default void addThisToEntityIfIntersects(SemanticEntity entity) {
TextBlock textBlock = getTextBlock();
if (textBlock.getTextRange().intersects(textEntity.getTextRange())) {
if (textBlock.containsTextRange(textEntity.getTextRange())) {
textEntity.setDeepestFullyContainingNode(this);
if (textBlock.getTextRange().intersects(entity.getTextRange())) {
if (textBlock.containsTextRange(entity.getTextRange())) {
entity.setDeepestFullyContainingNode(this);
}
textEntity.addIntersectingNode(this);
getDocumentTree().findIntersectingChildNodes(getTreeId(), textEntity.getTextRange())
.forEach(node -> node.addThisToEntityIfIntersects(textEntity));
entity.addIntersectingNode(this);
getDocumentTree().findIntersectingChildNodes(getTreeId(), entity.getTextRange())
.forEach(node -> node.addThisToEntityIfIntersects(entity));
}
}
@ -838,4 +861,17 @@ public interface SemanticNode {
return pages.size() == 1 && pages.contains(page);
}
default String buildId(TextRange textRange, String type, EntityType entityType) {
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = getPositionsPerPage(textRange);
return IdBuilder.buildId(rectanglesPerLinePerPage.keySet(),
rectanglesPerLinePerPage.values()
.stream()
.flatMap(Collection::stream)
.toList(),
type,
entityType.name());
}
}

View File

@ -15,6 +15,7 @@ import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
@ -48,7 +49,7 @@ public class Table implements SemanticNode {
TextBlock textBlock;
@Builder.Default
Set<TextEntity> entities = new HashSet<>();
Set<SemanticEntity> entities = new HashSet<>();
Map<Page, Rectangle2D> bBoxCache;
@ -109,7 +110,7 @@ public class Table implements SemanticNode {
.toList();
return streamTableCells().filter(tableCellNode -> vertebrateStudyCols.stream()
.anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value)))
.map(TableCell::getEntities)
.map(TableCell::getTextEntities)
.flatMap(Collection::stream);
}
@ -128,7 +129,7 @@ public class Table implements SemanticNode {
.toList();
return streamTableCells().filter(tableCellNode -> colsWithHeader.stream()
.anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values)))
.map(TableCell::getEntities)
.map(TableCell::getTextEntities)
.flatMap(Collection::stream);
}
@ -146,7 +147,7 @@ public class Table implements SemanticNode {
.filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type)
.anyMatch(types::contains))
.flatMap(this::streamRow)
.map(TableCell::getEntities)
.map(TableCell::getTextEntities)
.flatMap(Collection::stream);
}
@ -167,7 +168,7 @@ public class Table implements SemanticNode {
return entityTypes.containsAll(types);
})
.flatMap(this::streamRow)
.map(TableCell::getEntities)
.map(TableCell::getTextEntities)
.flatMap(Collection::stream);
}
@ -185,7 +186,7 @@ public class Table implements SemanticNode {
.filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type)
.noneMatch(types::contains))
.flatMap(this::streamRow)
.map(TableCell::getEntities)
.map(TableCell::getTextEntities)
.flatMap(Collection::stream);
}
@ -426,4 +427,30 @@ public class Table implements SemanticNode {
visitor.visit(this);
}
public String asCsv() {
StringBuilder sb = new StringBuilder();
for (int row = 0; row < numberOfRows; row++) {
for (int col = 0; col < numberOfCols; col++) {
TableCell cell = getCell(row, col);
String cellText = cell.getTextBlock().getSearchText().replaceAll("\\r?\\n", " ").trim();
if (cellText.contains(",") || cellText.contains("\"")) {
cellText = "\"" + cellText.replace("\"", "\"\"") + "\"";
}
sb.append(cellText);
if (col < numberOfCols - 1) {
sb.append(",");
}
}
sb.append("\n");
}
return sb.toString();
}
}

View File

@ -5,6 +5,8 @@ import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.IntersectingNodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
@ -50,15 +52,15 @@ public class EntityCreationUtility {
}
public void addToPages(TextEntity entity) {
public void addToPages(SemanticEntity entity) {
Set<Page> pages = entity.getDeepestFullyContainingNode().getPages(entity.getTextRange());
entity.getPages().addAll(pages);
pages.forEach(page -> page.getEntities().add(entity));
pages.forEach(page -> page.getSemanticEntities().add(entity));
}
public void addEntityToNodeEntitySets(TextEntity entity) {
public void addEntityToNodeEntitySets(SemanticEntity entity) {
entity.getIntersectingNodes()
.forEach(node -> node.getEntities().add(entity));

View File

@ -20,12 +20,15 @@ public class EntityEnrichmentService {
public void enrichEntity(TextEntity entity, TextBlock textBlock) {
entity.setValue(textBlock.subSequence(entity.getTextRange()).toString());
entity.setTextAfter(findTextAfter(entity.getTextRange().end(), textBlock));
entity.setTextBefore(findTextBefore(entity.getTextRange().start(), textBlock));
}
private String findTextAfter(int index, TextBlock textBlock) {
int endOffset = Math.min(index + SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().end());
String textAfter = textBlock.subSequence(index, endOffset).toString();
if (!textAfter.isBlank()) {
@ -38,7 +41,9 @@ public class EntityEnrichmentService {
return "";
}
private String findTextBefore(int index, TextBlock textBlock) {
int offsetBefore = Math.max(index - SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().start());
String textBefore = textBlock.subSequence(offsetBefore, index).toString();
if (!textBefore.isBlank()) {
@ -51,13 +56,17 @@ public class EntityEnrichmentService {
return "";
}
private List<String> splitToWordsAndRemoveEmptyWords(String text) {
return Arrays.stream(text.split(" "))
.filter(word -> !Objects.equals("", word))
.toList();
}
private String concatWordsBefore(List<String> words, boolean endWithSpace) {
StringBuilder sb = new StringBuilder();
for (String word : words) {
sb.append(word).append(" ");
@ -66,7 +75,9 @@ public class EntityEnrichmentService {
return endWithSpace ? result + " " : result;
}
private String concatWordsAfter(List<String> words, boolean startWithSpace) {
StringBuilder sb = new StringBuilder();
for (String word : words) {
sb.append(word).append(" ");

View File

@ -26,6 +26,9 @@ public class Component {
List<Entity> references;
@Builder.Default
ComponentFormat componentFormat = ComponentFormat.TEXT;
public boolean addReference(Entity entity) {

View File

@ -0,0 +1,7 @@
package com.iqser.red.service.redaction.v1.server.model.component;
public enum ComponentFormat {
TEXT,
CSV
}

View File

@ -36,6 +36,8 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityTyp
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
@ -70,11 +72,11 @@ public class EntityLogCreatorService {
ObservationRegistry observationRegistry;
private static boolean notFalsePositiveOrFalseRecommendationOrRemoval(TextEntity textEntity) {
private static boolean notFalsePositiveOrFalseRecommendationOrRemoval(SemanticEntity semanticEntity) {
return !(textEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) //
|| textEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION) //
|| textEntity.getEntityType().equals(EntityType.DICTIONARY_REMOVAL));
return !(semanticEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) //
|| semanticEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION) //
|| semanticEntity.getEntityType().equals(EntityType.DICTIONARY_REMOVAL));
}
@ -175,7 +177,7 @@ public class EntityLogCreatorService {
List<EntityLogEntry> entries = new ArrayList<>();
List<TextEntity> textEntities = document.getEntities()
List<SemanticEntity> semanticEntities = document.getEntities()
.stream()
.filter(entity -> !entity.getValue().isEmpty())
.filter(EntityLogCreatorService::notFalsePositiveOrFalseRecommendationOrRemoval)
@ -190,7 +192,7 @@ public class EntityLogCreatorService {
.toList();
List<String> allIds = new ArrayList<>();
allIds.addAll(textEntities.stream()
allIds.addAll(semanticEntities.stream()
.flatMap(entity -> entity.getPositionsOnPagePerPage()
.stream()
.map(PositionOnPage::getId))
@ -204,7 +206,7 @@ public class EntityLogCreatorService {
Map<String, List<ManualChange>> manualChangesMap = getManualChangesByEntityLogIds(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), allIds);
textEntities.forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode, analysisNumber, manualChangesMap)));
semanticEntities.forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode, analysisNumber, manualChangesMap)));
images.forEach(imageNode -> entries.add(createEntityLogEntry(imageNode,
dossierTemplateId,
@ -219,19 +221,19 @@ public class EntityLogCreatorService {
}
public List<EntityLogEntry> toEntityLogEntries(TextEntity textEntity, int analysisNumber, Map<String, List<ManualChange>> existingManualChangesMap) {
public List<EntityLogEntry> toEntityLogEntries(SemanticEntity semanticEntity, int analysisNumber, Map<String, List<ManualChange>> existingManualChangesMap) {
List<EntityLogEntry> entityLogEntries = new ArrayList<>();
// split entity into multiple entries if it occurs on multiple pages, since FE can't handle multi page entities
for (PositionOnPage positionOnPage : textEntity.getPositionsOnPagePerPage()) {
for (PositionOnPage positionOnPage : semanticEntity.getPositionsOnPagePerPage()) {
List<Position> rectanglesPerLine = positionOnPage.getRectanglePerLine()
.stream()
.map(rectangle2D -> new Position(rectangle2D, positionOnPage.getPage().getNumber()))
.toList();
EntityLogEntry entityLogEntry = createEntityLogEntry(textEntity, analysisNumber, existingManualChangesMap.getOrDefault(positionOnPage.getId(), new ArrayList<>()));
EntityLogEntry entityLogEntry = createEntityLogEntry(semanticEntity, analysisNumber, existingManualChangesMap.getOrDefault(positionOnPage.getId(), new ArrayList<>()));
// set the ID from the positions, since it might contain a "-" with the page number if the entity is split across multiple pages
entityLogEntry.setId(positionOnPage.getId());
@ -317,12 +319,24 @@ public class EntityLogCreatorService {
}
private EntityLogEntry createEntityLogEntry(SemanticEntity entity, int analysisNumber, List<ManualChange> existingManualChanges) {
if (entity instanceof TextEntity textEntity) {
return createEntityLogEntry(textEntity, analysisNumber, existingManualChanges);
} else if (entity instanceof TableEntity tableEntity) {
return createEntityLogEntry(tableEntity, analysisNumber, existingManualChanges);
} else {
throw new UnsupportedOperationException("Wrong semantic entity type given for entity log entry creation!!");
}
}
private EntityLogEntry createEntityLogEntry(TextEntity entity, int analysisNumber, List<ManualChange> existingManualChanges) {
Set<String> referenceIds = new HashSet<>();
entity.references()
.stream()
.filter(TextEntity::active)
.filter(SemanticEntity::active)
.forEach(ref -> ref.getPositionsOnPagePerPage()
.forEach(pos -> referenceIds.add(pos.getId())));
@ -365,7 +379,42 @@ public class EntityLogCreatorService {
}
private int determinePageParagraphIndex(TextEntity textEntity, EntryType entryType) {
private EntityLogEntry createEntityLogEntry(TableEntity tableEntity, int analysisNumber, List<ManualChange> existingManualChanges) {
Set<String> referenceIds = new HashSet<>();
tableEntity.references()
.stream()
.filter(IEntity::applied)
.forEach(ref -> ref.getPositionsOnPagePerPage()
.forEach(pos -> referenceIds.add(pos.getId())));
EntryType entryType = buildEntryType(tableEntity);
List<ManualChange> allManualChanges = ManualChangeFactory.toLocalManualChangeList(tableEntity.getManualOverwrite().getManualChangeLog(), true, analysisNumber);
return EntityLogEntry.builder()
.reason(tableEntity.buildReason())
.legalBasis(tableEntity.legalBasis())
.value(tableEntity.getValue())
.type(tableEntity.type())
.section(tableEntity.getManualOverwrite().getSection()
.orElse(this.buildSectionString(tableEntity.getDeepestFullyContainingNode())))
.containingNodeId(tableEntity.getDeepestFullyContainingNode().getTreeId())
.closestHeadline(tableEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText())
.matchedRule(tableEntity.getMatchedRule().getRuleIdentifier().toString())
.startOffset(tableEntity.getTextRange().start())
.endOffset(tableEntity.getTextRange().end())
// .engines(getEngines(tableEntity.getEngines(), tableEntity.getManualOverwrite()))
.reference(referenceIds)
.manualChanges(ManualChangesUtils.mergeManualChanges(existingManualChanges, allManualChanges))
.state(buildEntryState(tableEntity))
.entryType(entryType)
.paragraphPageIdx(determinePageParagraphIndex(tableEntity, entryType))
.build();
}
private int determinePageParagraphIndex(SemanticEntity textEntity, EntryType entryType) {
int pageParagraphIdx = -1;
@ -414,7 +463,7 @@ public class EntityLogCreatorService {
public static EntryType buildEntryType(IEntity entity) {
if (entity instanceof TextEntity textEntity) {
if (entity instanceof SemanticEntity textEntity) {
return getEntryType(textEntity.getEntityType());
} else if (entity instanceof PrecursorEntity precursorEntity) {
if (precursorEntity.isRectangle()) {

View File

@ -18,8 +18,6 @@ import com.google.common.collect.Sets;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
@ -146,9 +144,9 @@ public class ManualChangesApplicationService {
Set<Page> newIntersectingPages = new HashSet<>(closestEntity.getPages());
Sets.difference(currentIntersectingPages, newIntersectingPages)
.forEach(removedPage -> removedPage.getEntities().remove(entityToBeResized));
.forEach(removedPage -> removedPage.getSemanticEntities().remove(entityToBeResized));
Sets.difference(newIntersectingPages, currentIntersectingPages)
.forEach(addedPage -> addedPage.getEntities().add(entityToBeResized));
.forEach(addedPage -> addedPage.getSemanticEntities().add(entityToBeResized));
entityToBeResized.setDeepestFullyContainingNode(closestEntity.getDeepestFullyContainingNode());
entityToBeResized.setIntersectingNodes(new ArrayList<>(newIntersectingNodes));

View File

@ -102,9 +102,9 @@ public class UnprocessedChangesService {
}
document.getEntities()
.forEach(textEntity -> {
.forEach(entity -> {
Set<String> processedIds = new HashSet<>();
for (var positionsOnPerPage : textEntity.getPositionsOnPagePerPage()) {
for (var positionsOnPerPage : entity.getPositionsOnPagePerPage()) {
if (processedIds.contains(positionsOnPerPage.getId())) {
continue;
}
@ -113,17 +113,18 @@ public class UnprocessedChangesService {
.stream()
.map(rectangle2D -> new Position(rectangle2D, positionsOnPerPage.getPage().getNumber()))
.collect(Collectors.toList());
unprocessedManualEntities.add(UnprocessedManualEntity.builder()
.annotationId(allAnnotationIds.stream()
.filter(textEntity::matchesAnnotationId)
.findFirst()
.orElse(""))
.textBefore(textEntity.getTextBefore())
.textAfter(textEntity.getTextAfter())
.section(textEntity.getManualOverwrite().getSection()
.orElse(textEntity.getDeepestFullyContainingNode().toString()))
.positions(positions)
.build());
UnprocessedManualEntity.UnprocessedManualEntityBuilder builder = UnprocessedManualEntity.builder()
.annotationId(allAnnotationIds.stream()
.filter(entity::matchesAnnotationId)
.findFirst()
.orElse(""))
.section(entity.getManualOverwrite().getSection()
.orElse(entity.getDeepestFullyContainingNode().toString()))
.positions(positions);
if (entity instanceof TextEntity textEntity) {
builder.textBefore(textEntity.getTextBefore()).textAfter(textEntity.getTextAfter());
}
unprocessedManualEntities.add(builder.build());
}
});

View File

@ -23,8 +23,11 @@ import java.util.stream.Stream;
import org.kie.api.runtime.KieSession;
import com.iqser.red.service.redaction.v1.server.model.component.Component;
import com.iqser.red.service.redaction.v1.server.model.component.ComponentFormat;
import com.iqser.red.service.redaction.v1.server.model.component.Entity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.RuleIdentifier;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils;
import com.iqser.red.service.redaction.v1.server.utils.DateConverter;
@ -98,6 +101,22 @@ public class ComponentCreationService {
.value(value)
.valueDescription(valueDescription)
.references(new LinkedList<>(references))
.componentFormat(ComponentFormat.TEXT)
.build());
}
public void create(String ruleIdentifier, String name, String value, String valueDescription, Collection<Entity> references, ComponentFormat componentFormat) {
referencedEntities.addAll(references);
kieSession.insert(Component.builder()
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
.name(name)
.value(value)
.valueDescription(valueDescription)
.references(new LinkedList<>(references))
.componentFormat(componentFormat)
.build());
}
@ -376,6 +395,27 @@ public class ComponentCreationService {
}
public void createComponentForTables(String ruleIdentifier, Collection<Entity> entities) {
entities.stream()
.filter(entity -> !referencedEntities.contains(entity))
.sorted(EntityComparators.first())
.forEach(entity -> {
String value = entity.getValue();
ComponentFormat componentFormat = ComponentFormat.TEXT;
SemanticNode containingNode = entity.getContainingNode();
if (containingNode instanceof TableCell cell) { // case for tables with 0 rows and 0 columns
value = cell.getTextBlock().getSearchText();
} else if (containingNode instanceof Table table) {
value = table.asCsv();
componentFormat = ComponentFormat.CSV;
}
create(ruleIdentifier, entity.getType(), value, "Table Entity", List.of(entity), componentFormat);
});
}
/**
* Converts entity values to the 'dd/MM/yyyy' format and joins them with ', '. If the value could not be parsed as a date, it will be created as is.
*

View File

@ -22,7 +22,9 @@ import com.iqser.red.service.redaction.v1.server.model.document.ConsecutiveBound
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
@ -907,6 +909,34 @@ public class EntityCreationService {
}
/**
* Creates a table entity based on the document table.
*
* @param table The table to base the table entity on.
* @param type The type of entity to create.
* @param entityType The entity's classification.
* @return An optional containing the created {@link TableEntity}.
*/
public Optional<TableEntity> bySemanticNode(Table table, String type, EntityType entityType) {
TableEntity tableEntity = TableEntity.initialEntityNode(table, type, entityType);
Optional<TableEntity> optionalTableEntity = table.getDocumentTree().getRoot().getNode().getEntities()
.stream()
.filter(e -> e instanceof TableEntity && e.equals(tableEntity) && e.type().equals(type))
.map(e -> (TableEntity) e)
.findAny();
if (optionalTableEntity.isPresent()) {
return optionalTableEntity;
}
addListenerToEntity(tableEntity);
table.getDocumentTree().addEntityToGraph(tableEntity);
return Optional.of(tableEntity);
}
/**
* Expands a text entity's start boundary based on a regex pattern match.
*
@ -978,7 +1008,8 @@ public class EntityCreationService {
if (node.getDocumentTree().getRoot().getNode().getEntities().contains(entity)) {
Optional<TextEntity> optionalTextEntity = node.getDocumentTree().getRoot().getNode().getEntities()
.stream()
.filter(e -> e.equals(entity) && e.type().equals(type))
.filter(e -> e instanceof TextEntity && e.equals(entity) && e.type().equals(type))
.map(e -> (TextEntity) e)
.peek(e -> e.addEngines(engines))
.findAny();
if (optionalTextEntity.isEmpty()) {
@ -1419,7 +1450,7 @@ public class EntityCreationService {
.filter(e -> e.equals(entity))//
.filter(e -> !e.getTextRange().equals(entity.getTextRange()))//
.findAny()
.ifPresent(e -> addDuplicateEntityToGraph(e, entity.getTextRange(), node));
.ifPresent(e -> addDuplicateEntityToGraph((TextEntity) e, entity.getTextRange(), node));
} else {
addListenerToEntity(entity);
@ -1469,19 +1500,17 @@ public class EntityCreationService {
}
additionalIntersectingNode.getEntities().add(entityToDuplicate);
additionalIntersectingNode.getPages(newTextRange)
.forEach(page -> page.getEntities().add(entityToDuplicate));
.forEach(page -> page.getSemanticEntities().add(entityToDuplicate));
entityToDuplicate.addIntersectingNode(additionalIntersectingNode);
});
}
private void addListenerToEntity(TextEntity textEntity) {
private void addListenerToEntity(IEntity textEntity) {
if(kieSessionUpdater != null) {
if (kieSessionUpdater != null) {
textEntity.addEntityEventListener(kieSessionUpdater);
}
}
}

View File

@ -162,7 +162,7 @@ public class EntityFromPrecursorCreationService {
correctEntity.getIntersectingNodes()
.forEach(n -> n.getEntities().add(correctEntity));
correctEntity.getPages()
.forEach(page -> page.getEntities().add(correctEntity));
.forEach(page -> page.getSemanticEntities().add(correctEntity));
correctEntity.addMatchedRules(precursorEntity.getMatchedRuleList());
correctEntity.setDictionaryEntry(precursorEntity.isDictionaryEntry());

View File

@ -28,6 +28,7 @@ import com.iqser.red.service.redaction.v1.server.logger.RulesLogger;
import com.iqser.red.service.redaction.v1.server.logger.TrackingAgendaEventListener;
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
@ -128,9 +129,9 @@ public class EntityDroolsExecutionService {
}
});
for (TextEntity textEntity : document.getEntities()) {
textEntity.addEntityEventListener(kieSessionUpdater);
textEntity.notifyEntityInserted();
for (SemanticEntity semanticEntity : document.getEntities()) {
semanticEntity.addEntityEventListener(kieSessionUpdater);
semanticEntity.notifyEntityInserted();
}
document.getPages()

View File

@ -9,6 +9,8 @@ import org.kie.api.runtime.rule.FactHandle;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityEventListener;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
@ -50,14 +52,14 @@ public class KieSessionUpdater implements EntityEventListener {
private void handleOnEntityEvent(IEntity entity, Consumer<Object> consumer) {
if (entity instanceof TextEntity textEntity) {
updateIntersectingNodes(textEntity);
textEntity.getRelations().values()
if (entity instanceof SemanticEntity semanticEntity) {
updateIntersectingNodes(semanticEntity);
semanticEntity.getRelations().values()
.stream()
.flatMap(Collection::stream)
.forEach(consumer);
textEntity.getRelations().keySet()
.forEach(k -> k.getRelations().getOrDefault(textEntity, Collections.emptySet())
semanticEntity.getRelations().keySet()
.forEach(k -> k.getRelations().getOrDefault(semanticEntity, Collections.emptySet())
.forEach(consumer));
}
@ -71,9 +73,9 @@ public class KieSessionUpdater implements EntityEventListener {
}
private void updateIntersectingNodes(TextEntity textEntity) {
private void updateIntersectingNodes(SemanticEntity semanticEntity) {
textEntity.getIntersectingNodes()
semanticEntity.getIntersectingNodes()
.forEach(this::updateFactIfPresent);
}

View File

@ -37,6 +37,8 @@ import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.tenantcommons.TenantContext;
import lombok.SneakyThrows;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, properties = {"application.type=DocuMine"})
public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
@ -262,6 +264,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
@Test
@SneakyThrows
public void testDoseMortalityExtraction() {
AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/Documine/Flora/VV-547525_Toxicidade_Oral_Aguda.pdf");
@ -269,9 +272,18 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
System.out.println("Start Full integration test");
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
System.out.println("Finished structure analysis");
analyzeService.analyze(request);
AnalyzeResult analyze = analyzeService.analyze(request);
System.out.println("Finished analysis");
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
String outputFileName = OsUtils.getTemporaryDirectory() + "/Documine.pdf";
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
fileOutputStream.write(annotateResponse.getDocument());
}
var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var doseMortality = componentLog.getComponentLogEntries()
.stream()

View File

@ -0,0 +1,90 @@
package com.iqser.red.service.redaction.v1.server;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.when;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.tenantcommons.TenantContext;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, properties = {"application.type=DocuMine"})
class TableComponentsIntegrationTest extends AbstractRedactionIntegrationTest {
private static final String RULES = loadFromClassPath("drools/documine_flora_table_test.drl");
private static final String COMPONENT_RULES = loadFromClassPath("drools/documine_flora_table_test_components.drl");
private static final String DATE_FORMATS = loadFromClassPath("dateFormats.txt");
@BeforeEach
public void stubClients() {
TenantContext.setTenantId("redaction");
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(System.currentTimeMillis());
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(RULES));
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.COMPONENT)).thenReturn(System.currentTimeMillis());
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.COMPONENT)).thenReturn(JSONPrimitive.of(COMPONENT_RULES));
when(dateFormatsClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(System.currentTimeMillis());
when(dateFormatsClient.getDateFormats(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(DATE_FORMATS));
loadDictionaryForTest();
loadTypeForTest();
loadNerForTest();
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, null, true)).thenReturn(getTemplateDictionaryTypeResponse());
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, null, true)).thenReturn(getDossierDictionaryTypeResponse());
mockDictionaryCalls(null);
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
}
@Test
void testTableComponentsCreation() throws IOException {
AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/Documine/Flora/VV-547525_Toxicidade_Oral_Aguda.pdf");
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
analyzeService.analyze(request);
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
String outputFileName = OsUtils.getTemporaryDirectory() + "/TableComponents.pdf";
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
fileOutputStream.write(annotateResponse.getDocument());
}
var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID);
boolean tableComponentFound = componentLog.getComponentLogEntries()
.stream()
.anyMatch(entry -> "Table".equals(entry.getName()));
assertTrue(tableComponentFound, "Expected table component 'Table' to be present in the component log");
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
boolean tableEntityFound = entityLog.getEntityLogEntry()
.stream()
.anyMatch(entry -> entry.getMatchedRule() != null && entry.getMatchedRule().contains("T.0.0"));
assertTrue(tableEntityFound, "Expected table entity creation ('T.0.0') to be present in the entity log");
}
}

View File

@ -348,11 +348,11 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
.orElseThrow();
assertEquals(textEntity.getValue(), searchTerm);
assertTrue(pageNode.getEntities().contains(textEntity));
assertTrue(pageNode.getSemanticEntities().contains(textEntity));
assertTrue(document.getPages()
.stream()
.filter(page -> page != pageNode)
.noneMatch(page -> page.getEntities().contains(textEntity)));
.noneMatch(page -> page.getSemanticEntities().contains(textEntity)));
assertTrue(textEntity.getPages().contains(pageNode));
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
assertTrue(textEntity.getIntersectingNodes()

View File

@ -30,6 +30,7 @@ import com.iqser.red.service.redaction.v1.server.logger.Context;
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionarySearch;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
@ -200,14 +201,14 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest {
System.out.printf("%d Searches took %s s, average %.2f ms\n", numberOfRuns, ((float) totalSearchTime / 1000), totalSearchTime / numberOfRuns);
System.out.printf("%d Insertions took %s s, average %.2f ms\n", numberOfRuns, ((float) totalInsertTime / 1000), totalInsertTime / numberOfRuns);
System.out.printf("Found %d entities and saved %d\n", foundEntities.size(), document.getEntities().size());
for (TextEntity entity : document.getEntities()) {
for (TextEntity entity : document.getTextEntities()) {
var foundEntity = foundEntities.stream()
.filter(f -> f.getId().equals(entity.getId()))
.findFirst()
.get();
assertTrue(foundEntity.getTextRange().equals(entity.getTextRange()) || foundEntity.getDuplicateTextRanges().contains(entity.getTextRange()));
}
assert document.getEntities()
assert document.getTextEntities()
.stream()
.mapToInt(e -> e.getDuplicateTextRanges().size() + 1).sum() == foundEntities.size();
assert foundEntities.stream()
@ -225,10 +226,10 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest {
try (PDDocument pdDocument = Loader.loadPDF(fileResource.getFile())) {
for (Page page : document.getPages()) {
List<Rectangle2D> entityPositionsOnPage = page.getEntities()
List<Rectangle2D> entityPositionsOnPage = page.getSemanticEntities()
.stream()
.filter(entityNode -> !entityNode.removed())
.filter(TextEntity::applied)
.filter(SemanticEntity::applied)
.flatMap(entityNode -> entityNode.getPositionsOnPagePerPage()
.stream())
.filter(entityPosition -> entityPosition.getPage().equals(page))
@ -241,7 +242,7 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest {
}
for (Page page : document.getPages()) {
List<Rectangle2D> entityPositionsOnPage = page.getEntities()
List<Rectangle2D> entityPositionsOnPage = page.getSemanticEntities()
.stream()
.filter(entityNode -> !entityNode.removed())
.filter(entityNode -> !entityNode.applied())

View File

@ -83,7 +83,7 @@ public class TableTest extends BuildDocumentIntegrationTest {
file);
PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null);
var entityLayerGroup = EntityVisualizationUtility.createEntityLayerGroup(document.getEntities(), Color.MAGENTA);
var entityLayerGroup = EntityVisualizationUtility.createEntityLayerGroup(document.getTextEntities(), Color.MAGENTA);
viewerDocumentService.addLayerGroups(file, file, List.of(entityLayerGroup));
}

View File

@ -22,7 +22,7 @@ public class Cbi11Test extends RulesIntegrationTest {
doAnalysis(document, Collections.emptyList());
List<String> authorNames = document.getEntities()
List<String> authorNames = document.getTextEntities()
.stream()
.map(Dictionary::splitIntoAuthorNames)
.flatMap(Collection::stream)

View File

@ -9,6 +9,7 @@ import java.util.Set;
import java.util.stream.Collectors;
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.knecon.fforesight.service.viewerdoc.LayerIdentifier;
@ -46,9 +47,9 @@ public class EntityVisualizationUtility {
private static List<ColoredRectangle> getEntityRectangles(Color color, Page page) {
return page.getEntities()
return page.getSemanticEntities()
.stream()
.map(TextEntity::getPositionsOnPagePerPage)
.map(SemanticEntity::getPositionsOnPagePerPage)
.flatMap(Collection::stream)
.filter(p -> p.getPage().equals(page))
.map(PositionOnPage::getRectanglePerLine)

View File

@ -460,7 +460,6 @@ rule "UsedApproach.1.0: Used approach not found and thus 'Individual'"
componentCreationService.create("UsedApproach.1.0", "What_was_the_approach_used", "Individual", "'Group' when approach used is present, else 'Individual'");
end
rule "DefaultComponents.999.0: Create components for all unmapped entities."
salience -999
when

View File

@ -0,0 +1,492 @@
package drools
import static java.lang.String.format;
import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.anyMatch;
import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.exactMatch;
import java.util.List;
import java.util.LinkedList;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.Collection;
import java.util.stream.Stream;
import java.util.Optional;
import com.iqser.red.service.redaction.v1.server.logger.RulesLogger;
import com.iqser.red.service.redaction.v1.server.model.component.Component;
import com.iqser.red.service.redaction.v1.server.model.component.Entity;
import com.iqser.red.service.redaction.v1.server.service.components.ComponentMappingService;
import com.iqser.red.service.redaction.v1.server.service.document.ComponentCreationService;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Change;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
global ComponentCreationService componentCreationService
global ComponentMappingService componentMappingService
global RulesLogger logger
//------------------------------------ queries ------------------------------------
query "getFileAttributes"
$fileAttribute: FileAttribute()
end
query "getComponents"
$component: Component()
end
//------------------------------------ table rules ------------------------------------
rule "TableComponents.0.0: Create components for all table entities."
when
$tables: List() from collect (Entity(type == "Table"))
then
componentCreationService.createComponentForTables("TableComponents.0.0", $tables);
end
//------------------------------------ Default Components rules ------------------------------------
rule "StudyTitle.0.0: First Title found"
when
$titleCandidates: List() from collect (Entity(type == "title"))
then
componentCreationService.firstOrElse("StudyTitle.0.0", "Study_Title", $titleCandidates, "");
end
rule "PerformingLaboratory.1.0: Performing Laboratory name and country found in same section"
when
$laboratoryName: Entity(type == "laboratory_name", $node: containingNode)
$laboratoryCountry: Entity(type == "laboratory_country", containingNode == $node)
not Entity(type == "laboratory_country", containingNode == $node, Math.abs($laboratoryName.startOffset - startOffset) < Math.abs($laboratoryName.startOffset - $laboratoryCountry.startOffset))
then
componentCreationService.create("PerformingLaboratory.1.0", "Performing_Laboratory", $laboratoryName.getValue() + ", " + $laboratoryCountry.getValue(), "Laboratory name and country found!", List.of($laboratoryName, $laboratoryCountry));
end
rule "PerformingLaboratory.2.0: Performing Laboratory name but no country found in same section"
when
$laboratoryName: Entity(type == "laboratory_name", $node: containingNode)
not Entity(type == "laboratory_country", containingNode == $node)
then
componentCreationService.create("PerformingLaboratory.2.0", "Performing_Laboratory", $laboratoryName.getValue(), "Only laboratory name found!", List.of($laboratoryName));
end
rule "PerformingLaboratory.0.2: Performing Laboratory not found"
salience -1
when
not Component(name == "Performing_Laboratory")
then
componentCreationService.create("PerformingLaboratory.0.2", "Performing_Laboratory", "", "fallback");
end
rule "ReportNumber.0.0: First Report number found"
when
$reportNumberCandidates: List() from collect (Entity(type == "report_number"))
then
componentCreationService.firstOrElse("ReportNumber.0.0", "Report_Number", $reportNumberCandidates, "");
end
rule "GLPStudy.0.0: GLP Study found"
when
$glpStudyList: List(!isEmpty) from collect(Entity(type == "glp_study"))
then
componentCreationService.create("GLPStudy.0.0", "GLP_Study", "Yes", "Yes if present, No if not", $glpStudyList);
end
rule "GLPStudy.1.0: GLP Study not found"
when
not Entity(type == "glp_study")
then
componentCreationService.create("GLPStudy.1.0", "GLP_Study", "No", "Yes if present, No if not");
end
rule "TestGuideline.0.1: match OECD number and year with guideline mappings"
salience 1
when
not Component(name == "Test_Guidelines_1")
$guidelineNumber: Entity(type == "oecd_guideline_number", $number: value)
$guidelineYear: Entity(type == "oecd_guideline_year", $year: value)
then
Optional<String> guidelineMatch = componentMappingService.from("GuidelineMapping").where("number = " + $number).where("year = " + $year).select("description").findAny();
if (guidelineMatch.isEmpty()) {
return;
}
componentCreationService.create(
"TestGuideline.0.0",
"Test_Guidelines_1",
guidelineMatch.get(),
"OECD Number and guideline year mapped!",
List.of($guidelineNumber, $guidelineYear)
);
end
rule "TestGuideline.1.0: no guideline mapping found"
when
not Component(name == "Test_Guidelines_1")
$guideLine: Entity(type == "oecd_guideline")
then
componentCreationService.create("TestGuideline.2.0", "Test_Guidelines_1", $guideLine.getValue(), "No Mapping for OECD number and year found, using fallback instead!", List.of($guideLine));
end
rule "TestGuideline.2.0: All values of EPA guideline and EC guidelines"
when
$guidelines: List() from collect (Entity(type == "epa_guideline" || type == "ec_guideline"))
then
componentCreationService.joining("TestGuideline.2.0", "Test_Guidelines_2", $guidelines);
end
rule "StartDate.0.0: All experimental start dates converted to dd/MM/yyyy"
when
$startDates: List() from collect (Entity(type == "experimental_start_date"))
then
componentCreationService.convertDates("StartDate.0.0", "Experimental_Starting_Date", $startDates);
end
rule "CompletionDate.0.0: All experimental end dates converted to dd/MM/yyyy"
when
$endDates: List() from collect (Entity(type == "experimental_end_date"))
then
componentCreationService.convertDates("CompletionDate.0.0", "Experimental_Completion_Date", $endDates);
end
rule "AnalysisCertificate.0.0: Unique values of certificate of analysis batch identification"
when
$batchNumbers: List() from collect (Entity(type == "batch_number"))
then
componentCreationService.joiningUnique("AnalysisCertificate.0.0", "Certificate_of_Analysis_Batch_Identification", $batchNumbers);
end
rule "StudyConclusion.0.0: Study conclusion in first found section"
when
$oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$studyConclusions: List() from collect(Entity(type == "study_conclusion"))
then
componentCreationService.joiningFromFirstSectionOnly("StudyConclusion.0.0", "Study_Conclusion", $studyConclusions, " ");
end
rule "GuidelineDeviation.0.0: Guideline deviation as sentences"
when
$oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$guidelineDeviations: List() from collect (Entity(type == "guideline_deviation"))
then
componentCreationService.joining("GuidelineDeviation.0.0", "Deviation_from_the_Guideline", $guidelineDeviations, "\n");
end
rule "Species.0.0: First found species"
when
$oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$species: List() from collect (Entity(type == "species"))
then
componentCreationService.firstOrElse("Species.0.0", "Species", $species, "");
end
rule "Strain.0.0: First found strain"
when
$oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$strain: List() from collect (Entity(type == "strain"))
then
componentCreationService.firstOrElse("Strain.0.0", "Strain", $strain, "");
end
rule "Conclusion.0.0: Unique values of Conclusion LD50"
when
$oecdNumber: String() from List.of("402", "403", "425", "436")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$conclusions: List() from collect (Entity(type == "ld50_value"))
then
componentCreationService.joiningUnique("Conclusion.0.0", "Conclusion_LD50_mg_per_kg", $conclusions);
end
rule "Conclusion0.1.0: Greater than found"
when
$oecdNumber: String() from List.of("402", "403", "425", "436")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$conclusions: List(!isEmpty()) from collect (Entity(type == "ld50_greater"))
then
componentCreationService.create("Conclusion.1.0", "Conclusion_LD50_Greater_than", "Greater than", "Entity of type 'ld50_greater' found", $conclusions);
end
rule "Conclusion.1.1: Greater than not found"
when
$oecdNumber: String() from List.of("402", "403", "425", "436")
FileAttribute(label == "OECD Number", value == $oecdNumber)
not Entity(type == "ld50_greater")
then
componentCreationService.create("Conclusion.1.1", "Conclusion_LD50_Greater_than", "", "No entity of type 'ld50_greater' found");
end
rule "Conclusion.2.0: Minimum confidence as unique values"
when
$oecdNumber: String() from List.of("402", "403", "425", "436")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$conclusions: List() from collect (Entity(type == "confidence_minimal"))
then
componentCreationService.joiningUnique("Conclusion.2.0", "Conclusion_Minimum_Confidence", $conclusions);
end
rule "Conclusion.3.0: Maximum confidence as unique values"
when
$oecdNumber: String() from List.of("402", "403", "425", "436")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$conclusions: List() from collect (Entity(type == "confidence_maximal"))
then
componentCreationService.joiningUnique("Conclusion.3.0", "Conclusion_Maximum_Confidence", $conclusions);
end
rule "Necropsy.0.0: Necropsy findings from longest section"
when
FileAttribute(label == "OECD Number", value == "402")
$necropsies: List() from collect (Entity(type == "necropsy_findings"))
then
componentCreationService.joiningFromLongestSectionOnly("Necropsy.0.0", "Necropsy_Findings", $necropsies, " ");
end
rule "Necropsy.0.1: Necropsy findings joined with \n"
when
FileAttribute(label == "OECD Number", value == "403" || value == "436")
$necropsies: List() from collect (Entity(type == "necropsy_findings"))
then
componentCreationService.joining("Necropsy.0.0", "Necropsy_Findings", $necropsies, "\n");
end
rule "Necropsy.1.0: Doses mg per kg of Bodyweight as one block"
when
FileAttribute(label == "OECD Number", value == "402")
$dosages: List() from collect (Entity(type == "doses_(mg_kg_bw)"))
then
componentCreationService.joining("Necropsy.1.0", "Doses_mg_per_kg_bw", $dosages, " ");
end
rule "Necropsy.2.0: Conducted with 4 hours of exposure as one block"
when
$oecdNumber: String() from List.of("403", "436")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$exposures: List() from collect (Entity(type == "4h_exposure"))
then
componentCreationService.joining("Necropsy.3.0", "Conducted_with_4_Hours_of_Exposure", $exposures, " ");
end
rule "StudyDesign.0.0: Study design as one block"
when
$oecdNumber: String() from List.of("404", "405", "406", "428", "429", "438", "439", "474", "487")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$studyDesigns: List() from collect (Entity(type == "study_design"))
then
componentCreationService.joining("StudyDesign.0.0", "Study_Design", $studyDesigns, " ");
end
rule "Results.0.0: Results and conclusions as joined values"
when
$oecdNumber: String() from List.of("406", "428", "438", "439", "474", "487")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$results: List() from collect (Entity(type == "results_and_conclusion"))
then
componentCreationService.joining("Results.0.0", "Results_and_Conclusions", $results, " ");
end
rule "WeightBehavior.0.0: Weight change behavior as sentences"
when
FileAttribute(label == "OECD Number", value == "402")
$weightChanges: List() from collect (Entity(type == "weight_behavior_changes"))
then
componentCreationService.joining("WeightBehavior.0.0", "Weight_Behavior_Changes", $weightChanges, "\n");
end
rule "MortalityStatement.0.0: Mortality statements as one block"
when
FileAttribute(label == "OECD Number", value == "402")
$mortalityStatements: List() from collect (Entity(type == "mortality_statement"))
then
componentCreationService.joining("MortalityStatement.0.0", "Mortality_Statement", $mortalityStatements, " ");
end
rule "ClinicalObservations.0.0: Clinical observations as sentences"
when
FileAttribute(label == "OECD Number", value == "403")
$observations: List() from collect (Entity(type == "clinical_observations"))
then
componentCreationService.joining("MortalityStatement.0.0", "Clinical_Observations", $observations, "\n");
end
rule "BodyWeight.0.0: Bodyweight changes as sentences"
when
FileAttribute(label == "OECD Number", value == "403")
$weightChanges: List() from collect (Entity(type == "bodyweight_changes"))
then
componentCreationService.joining("BodyWeight.0.0", "Body_Weight_Changes", $weightChanges, "\n");
end
rule "Detailing.0.0: Detailing of reported changes as one block"
when
$oecdNumber: String() from List.of("404", "405")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$detailings: List() from collect (Entity(type == "detailing"))
then
componentCreationService.joining("Detailing.0.0", "Detailing_of_Reported_Changes", $detailings, " ");
end
rule "Sex.0.0: Male sex found"
when
$oecdNumber: String() from List.of("405", "429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$males: List(!isEmpty) from collect (Entity(type == "sex", (value.toLowerCase() == "male" || value.toLowerCase() == "males")))
then
componentCreationService.create("Sex.0.0", "Sex", "male", "male sex found", $males);
end
rule "Sex.1.0: Female sex found"
when
$oecdNumber: String() from List.of("405", "429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$females: List(!isEmpty) from collect (Entity(type == "sex", (value.toLowerCase() == "female" || value.toLowerCase() == "females")))
then
componentCreationService.create("Sex.0.0", "Sex", "female", "female sex found", $females);
end
rule "NumberOfAnimals.0.0: Number of animals found"
when
$oecdNumber: String() from List.of("405", "429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$numberOfAnimals: Entity(type == "number_of_animals")
then
componentCreationService.create("NumberOfAnimals.0.0", "Number_of_Animals", $numberOfAnimals.getValue(), "Number of animals found directly", $numberOfAnimals);
end
rule "NumberOfAnimals.1.0: Count unique occurences of animals"
when
$oecdNumber: String() from List.of("405", "429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
not Entity(type == "number_of_animals")
$animals: List() from collect (Entity(type == "animal_number"))
then
componentCreationService.uniqueValueCount("NumberOfAnimals.1.0", "Number_of_Animals", $animals);
end
rule "ClinicalSigns.0.0: Clinical signs as sentences"
when
$oecdNumber: String() from List.of("425")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$clinicalSigns: List() from collect (Entity(type == "clinical_signs"))
then
componentCreationService.joining("ClinicalSigns.0.0", "Clinical_Signs", $clinicalSigns, "\n");
end
rule "DoseMortality.0.0: Dose mortality joined with dose from same table row"
when
$oecdNumber: String() from List.of("425")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$doseMortalities: List() from collect (Entity(type == "dose_mortality" || type == "dose_mortality_dose"))
then
componentCreationService.joiningFromSameTableRow("DoseMortality.0.0", "Dose_Mortality", $doseMortalities);
end
rule "Mortality.0.0: Mortality as one block"
when
$oecdNumber: String() from List.of("425")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$mortalities: List() from collect (Entity(type == "mortality"))
then
componentCreationService.joining("Mortality.0.0", "Mortality", $mortalities, " ");
end
rule "Dosages.0.0: First found value of Dosages"
when
$oecdNumber: String() from List.of("425")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$mortalities: List() from collect (Entity(type == "dosages"))
then
componentCreationService.firstOrElse("Dosages.0.0", "Dosages", $mortalities, "");
end
rule "PrelimResults.0.0: Preliminary test results as sentences"
when
$oecdNumber: String() from List.of("429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$results: List() from collect (Entity(type == "preliminary_test_results"))
then
componentCreationService.joining("PrelimResults.0.0", "Preliminary_Test_Results", $results, "\n");
end
rule "TestResults.0.0: Test results as one block"
when
$oecdNumber: String() from List.of("429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$results: List() from collect (Entity(type == "test_results"))
then
componentCreationService.joining("TestResults.0.0", "Test_Results", $results, " ");
end
rule "PositiveControl.0.0: Was the definitive study conducted with positive control"
when
$oecdNumber: String() from List.of("429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$results: List() from collect (Entity(type == "positive_control"))
then
componentCreationService.joining("PositiveControl.0.0", "Was_the_definitive_study_conducted_with_positive_control", $results, " ");
end
rule "MainResults.0.0: Results from main study as one block"
when
$oecdNumber: String() from List.of("429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$results: List() from collect (Entity(type == "results_(main_study)"))
then
componentCreationService.joining("MainResults.0.0", "Results_Main_Study", $results, " ");
end
rule "UsedApproach.0.0: Used approach found and mapped to 'Group'"
when
$oecdNumber: String() from List.of("429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
$results: List(!isEmpty()) from collect (Entity(type == "approach_used"))
then
componentCreationService.create("UsedApproach.0.0", "What_was_the_approach_used", "Group", "'Group' when approach used is present, else 'Individual'", $results);
end
rule "UsedApproach.1.0: Used approach not found and thus 'Individual'"
when
$oecdNumber: String() from List.of("429")
FileAttribute(label == "OECD Number", value == $oecdNumber)
not Entity(type == "approach_used")
then
componentCreationService.create("UsedApproach.1.0", "What_was_the_approach_used", "Individual", "'Group' when approach used is present, else 'Individual'");
end
rule "DefaultComponents.999.0: Create components for all unmapped entities."
salience -999
when
not FileAttribute(label == "OECD Number")
$allEntities: List(!isEmpty()) from collect (Entity())
then
componentCreationService.createComponentsForUnMappedEntities("DefaultComponents.999.0", $allEntities);
end
//------------------------------------ Component merging rules ------------------------------------
/*
rule "X.0.0: merge duplicate component references"
when
$first: Component()
$duplicate: Component(this != $first, name == $first.name, value == $first.value)
then
$first.getReferences().addAll($duplicate.getReferences());
retract($duplicate);
end
*/

View File

@ -68,6 +68,15 @@ query "getFileAttributes"
$fileAttribute: FileAttribute()
end
//------------------------------------ T rules ------------------------------------
rule "T.0.0: Create TableEntities for all Tables"
when
$table: Table()
then
Optional<TableEntity> tableEntity = entityCreationService.bySemanticNode($table, "Table", EntityType.ENTITY);
tableEntity.ifPresent(t -> t.apply("T.0.0", "Table found.", "n-a"));
end
//------------------------------------ H rules ------------------------------------
// Rule unit: H.0