RED-10708: Tables as components in DM #583
@ -10,6 +10,7 @@ import java.util.Optional;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.GenericSemanticNode;
|
||||
@ -362,12 +363,15 @@ public class DocumentTree {
|
||||
}
|
||||
|
||||
|
||||
public void addEntityToGraph(TextEntity entity) {
|
||||
public void addEntityToGraph(SemanticEntity entity) {
|
||||
|
||||
getRoot().getNode().addThisToEntityIfIntersects(entity);
|
||||
|
||||
TextBlock textBlock = entity.getDeepestFullyContainingNode().getTextBlock();
|
||||
EntityEnrichmentService.enrichEntity(entity, textBlock);
|
||||
|
||||
if (entity instanceof TextEntity textEntity) {
|
||||
EntityEnrichmentService.enrichEntity(textEntity, textBlock);
|
||||
}
|
||||
|
||||
EntityCreationUtility.addToPages(entity);
|
||||
EntityCreationUtility.addEntityToNodeEntitySets(entity);
|
||||
@ -378,6 +382,7 @@ public class DocumentTree {
|
||||
|
||||
entity.computeRelations();
|
||||
entity.notifyEntityInserted();
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -188,7 +188,7 @@ public interface IEntity {
|
||||
*
|
||||
* @return A set of references.
|
||||
*/
|
||||
default Set<TextEntity> references() {
|
||||
default Set<SemanticEntity> references() {
|
||||
|
||||
return getMatchedRule().getReferences();
|
||||
}
|
||||
|
||||
@ -40,7 +40,7 @@ public final class MatchedRule implements Comparable<MatchedRule> {
|
||||
boolean ignored;
|
||||
|
||||
@Builder.Default
|
||||
Set<TextEntity> references = Collections.emptySet();
|
||||
Set<SemanticEntity> references = Collections.emptySet();
|
||||
|
||||
|
||||
/**
|
||||
|
||||
@ -0,0 +1,130 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.collections4.map.HashedMap;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
||||
@Data
|
||||
@FieldDefaults(level = AccessLevel.PROTECTED)
|
||||
@SuperBuilder
|
||||
public abstract class SemanticEntity implements IEntity {
|
||||
|
||||
final EntityType entityType;
|
||||
String type; // TODO: make final once ManualChangesApplicationService::recategorize is deleted
|
||||
|
||||
@Builder.Default
|
||||
final PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
|
||||
@Builder.Default
|
||||
final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
|
||||
|
||||
@Builder.Default
|
||||
Set<Page> pages = new HashSet<>();
|
||||
List<PositionOnPage> positionsOnPagePerPage;
|
||||
|
||||
@Builder.Default
|
||||
List<SemanticNode> intersectingNodes = new LinkedList<>();
|
||||
SemanticNode deepestFullyContainingNode;
|
||||
|
||||
@Builder.Default
|
||||
Map<SemanticEntity, Set<Relation>> relations = new HashMap<>();
|
||||
|
||||
@Builder.Default
|
||||
Collection<EntityEventListener> entityEventListeners = new ArrayList<>();
|
||||
|
||||
|
||||
/**
|
||||
* @return true when this entity is of EntityType ENTITY or HINT
|
||||
*/
|
||||
public boolean validEntityType() {
|
||||
|
||||
return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean valid() {
|
||||
|
||||
return active() && validEntityType();
|
||||
}
|
||||
|
||||
|
||||
public boolean isType(String type) {
|
||||
|
||||
return type().equals(type);
|
||||
}
|
||||
|
||||
|
||||
public boolean isAnyType(List<String> types) {
|
||||
|
||||
return types.contains(type());
|
||||
}
|
||||
|
||||
|
||||
public boolean matchesAnnotationId(String manualRedactionId) {
|
||||
|
||||
return getPositionsOnPagePerPage().stream()
|
||||
.anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
|
||||
}
|
||||
|
||||
|
||||
public void addIntersectingNode(SemanticNode containingNode) {
|
||||
|
||||
intersectingNodes.add(containingNode);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void addEntityEventListener(EntityEventListener listener) {
|
||||
|
||||
entityEventListeners.add(listener);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void removeEntityEventListener(EntityEventListener listener) {
|
||||
|
||||
entityEventListeners.remove(listener);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Collection<EntityEventListener> getEntityEventListeners() {
|
||||
|
||||
return entityEventListeners;
|
||||
}
|
||||
|
||||
|
||||
public abstract void computeRelations();
|
||||
|
||||
|
||||
public void removeFromGraph() {
|
||||
|
||||
remove("FINAL.0.0", "removed completely");
|
||||
intersectingNodes.forEach(node -> node.getEntities().remove(this));
|
||||
pages.forEach(page -> page.getSemanticEntities().remove(this));
|
||||
intersectingNodes = new LinkedList<>();
|
||||
relations.keySet()
|
||||
.forEach(entity -> entity.getRelations().remove(this));
|
||||
relations = new HashedMap<>();
|
||||
deepestFullyContainingNode = null;
|
||||
pages = new HashSet<>();
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,102 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.collections4.map.HashedMap;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
||||
@Data
|
||||
@SuperBuilder
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
|
||||
public class TableEntity extends SemanticEntity {
|
||||
|
||||
@EqualsAndHashCode.Include
|
||||
final String id;
|
||||
|
||||
Table table;
|
||||
|
||||
|
||||
public static TableEntity initialEntityNode(Table table, String type, EntityType entityType) {
|
||||
|
||||
return TableEntity.builder()
|
||||
.id(table.buildId(table.getTextRange(), type, entityType))
|
||||
.type(type)
|
||||
.entityType(entityType)
|
||||
.manualOverwrite(new ManualChangeOverwrite(entityType))
|
||||
.table(table)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getValue() {
|
||||
|
||||
return "Table:" + table.getHeadline();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public TextRange getTextRange() {
|
||||
|
||||
return table.getTextBlock().getTextRange();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String type() {
|
||||
|
||||
return getManualOverwrite().getType()
|
||||
.orElse(NodeType.TABLE.toString());
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<PositionOnPage> getPositionsOnPagePerPage() {
|
||||
|
||||
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {
|
||||
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = table.getTextBlock().getPositionsPerPage(table.getTextRange());
|
||||
|
||||
positionsOnPagePerPage = rectanglesPerLinePerPage.entrySet()
|
||||
.stream()
|
||||
.map(entry -> new PositionOnPage(id, entry.getKey(), entry.getValue()))
|
||||
.toList();
|
||||
}
|
||||
return positionsOnPagePerPage;
|
||||
}
|
||||
|
||||
|
||||
public String asCsv() {
|
||||
|
||||
return table.asCsv();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void computeRelations() {
|
||||
// NO - OP
|
||||
// can be implemented in the future
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,15 +1,11 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.collections4.map.HashedMap;
|
||||
@ -19,26 +15,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.IdBuilder;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
||||
/**
|
||||
* Represents a text entity within a document, characterized by its text range, type, entity type,
|
||||
* and associated metadata like matched rules, pages, and engines.
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@SuperBuilder
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
|
||||
@SuppressWarnings("PMD.AvoidFieldNameMatchingMethodName")
|
||||
public class TextEntity implements IEntity {
|
||||
public class TextEntity extends SemanticEntity {
|
||||
|
||||
// primary key
|
||||
@EqualsAndHashCode.Include
|
||||
@ -48,13 +42,6 @@ public class TextEntity implements IEntity {
|
||||
TextRange textRange;
|
||||
@Builder.Default
|
||||
Set<TextRange> duplicateTextRanges = new HashSet<>();
|
||||
String type; // TODO: make final once ManualChangesApplicationService::recategorize is deleted
|
||||
final EntityType entityType;
|
||||
|
||||
@Builder.Default
|
||||
final PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
|
||||
@Builder.Default
|
||||
final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
|
||||
|
||||
boolean dictionaryEntry;
|
||||
boolean dossierDictionaryEntry;
|
||||
@ -66,24 +53,12 @@ public class TextEntity implements IEntity {
|
||||
String value;
|
||||
String textBefore;
|
||||
String textAfter;
|
||||
@Builder.Default
|
||||
Set<Page> pages = new HashSet<>();
|
||||
List<PositionOnPage> positionsOnPagePerPage;
|
||||
@Builder.Default
|
||||
List<SemanticNode> intersectingNodes = new LinkedList<>();
|
||||
SemanticNode deepestFullyContainingNode;
|
||||
|
||||
@Builder.Default
|
||||
Map<TextEntity, Set<Relation>> relations = new HashMap<>();
|
||||
|
||||
@Builder.Default
|
||||
Collection<EntityEventListener> entityEventListeners = new ArrayList<>();
|
||||
|
||||
|
||||
public static TextEntity initialEntityNode(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return TextEntity.builder()
|
||||
.id(buildId(node, textRange, type, entityType))
|
||||
.id(node.buildId(textRange, type, entityType))
|
||||
.type(type)
|
||||
.entityType(entityType)
|
||||
.textRange(textRange)
|
||||
@ -110,19 +85,6 @@ public class TextEntity implements IEntity {
|
||||
}
|
||||
|
||||
|
||||
private static String buildId(SemanticNode node, TextRange textRange, String type, EntityType entityType) {
|
||||
|
||||
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = node.getPositionsPerPage(textRange);
|
||||
return IdBuilder.buildId(rectanglesPerLinePerPage.keySet(),
|
||||
rectanglesPerLinePerPage.values()
|
||||
.stream()
|
||||
.flatMap(Collection::stream)
|
||||
.toList(),
|
||||
type,
|
||||
entityType.name());
|
||||
}
|
||||
|
||||
|
||||
public void addTextRange(TextRange textRange) {
|
||||
|
||||
duplicateTextRanges.add(textRange);
|
||||
@ -143,44 +105,13 @@ public class TextEntity implements IEntity {
|
||||
}
|
||||
|
||||
|
||||
public boolean isType(String type) {
|
||||
|
||||
return type().equals(type);
|
||||
}
|
||||
|
||||
|
||||
public boolean isAnyType(List<String> types) {
|
||||
|
||||
return types.contains(type());
|
||||
}
|
||||
|
||||
|
||||
public void addIntersectingNode(SemanticNode containingNode) {
|
||||
|
||||
intersectingNodes.add(containingNode);
|
||||
}
|
||||
|
||||
|
||||
public String getValueWithLineBreaks() {
|
||||
|
||||
return getDeepestFullyContainingNode().getTextBlock().subSequenceWithLineBreaks(getTextRange());
|
||||
}
|
||||
|
||||
|
||||
public void removeFromGraph() {
|
||||
|
||||
remove("FINAL.0.0", "removed completely");
|
||||
intersectingNodes.forEach(node -> node.getEntities().remove(this));
|
||||
pages.forEach(page -> page.getEntities().remove(this));
|
||||
intersectingNodes = new LinkedList<>();
|
||||
relations.keySet()
|
||||
.forEach(entity -> entity.getRelations().remove(this));
|
||||
relations = new HashedMap<>();
|
||||
deepestFullyContainingNode = null;
|
||||
pages = new HashSet<>();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<PositionOnPage> getPositionsOnPagePerPage() {
|
||||
|
||||
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {
|
||||
@ -215,6 +146,7 @@ public class TextEntity implements IEntity {
|
||||
return textEntity.contains(this);
|
||||
}
|
||||
|
||||
|
||||
public boolean contains(TextEntity textEntity) {
|
||||
|
||||
if (this.textRange.contains(textEntity.getTextRange())) {
|
||||
@ -239,7 +171,6 @@ public class TextEntity implements IEntity {
|
||||
}
|
||||
|
||||
|
||||
|
||||
public boolean intersects(TextEntity textEntity) {
|
||||
|
||||
return this.textRange.intersects(textEntity.getTextRange()) //
|
||||
@ -277,14 +208,6 @@ public class TextEntity implements IEntity {
|
||||
notifyEntityUpdated();
|
||||
}
|
||||
|
||||
|
||||
public boolean matchesAnnotationId(String manualRedactionId) {
|
||||
|
||||
return getPositionsOnPagePerPage().stream()
|
||||
.anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
@ -316,21 +239,6 @@ public class TextEntity implements IEntity {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return true when this entity is of EntityType ENTITY or HINT
|
||||
*/
|
||||
public boolean validEntityType() {
|
||||
|
||||
return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT);
|
||||
}
|
||||
|
||||
|
||||
public boolean valid() {
|
||||
|
||||
return active() && validEntityType();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String value() {
|
||||
|
||||
@ -339,41 +247,32 @@ public class TextEntity implements IEntity {
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void addEntityEventListener(EntityEventListener listener) {
|
||||
|
||||
entityEventListeners.add(listener);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void removeEntityEventListener(EntityEventListener listener) {
|
||||
|
||||
entityEventListeners.remove(listener);
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void computeRelations() {
|
||||
|
||||
for (TextEntity textEntity : this.getDeepestFullyContainingNode().getEntities()) {
|
||||
if (this.intersects(textEntity) && !this.equals(textEntity) && !textEntity.getEntityType().equals(EntityType.TEMPORARY)) {
|
||||
if (textEntity.getTextRange().equals(this.getTextRange())) {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Equality(this, textEntity));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Equality(textEntity, this));
|
||||
} else if (textEntity.containedBy(this)) {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Containment(this, textEntity));
|
||||
} else if (this.containedBy(textEntity)) {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Containment(textEntity, this));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
|
||||
} else {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
|
||||
}
|
||||
this.getDeepestFullyContainingNode().getEntities()
|
||||
.stream()
|
||||
.filter(semanticEntity -> semanticEntity instanceof TextEntity)
|
||||
.map(semanticEntity -> (TextEntity) semanticEntity)
|
||||
.forEach(textEntity -> {
|
||||
|
||||
if (this.intersects(textEntity) && !this.equals(textEntity) && !textEntity.getEntityType().equals(EntityType.TEMPORARY)) {
|
||||
if (textEntity.getTextRange().equals(this.getTextRange())) {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Equality(this, textEntity));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Equality(textEntity, this));
|
||||
} else if (textEntity.containedBy(this)) {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Containment(this, textEntity));
|
||||
} else if (this.containedBy(textEntity)) {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Containment(textEntity, this));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
|
||||
} else {
|
||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
|
||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
|
||||
}
|
||||
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -7,6 +7,7 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
|
||||
@ -38,7 +39,7 @@ public abstract class AbstractSemanticNode implements GenericSemanticNode {
|
||||
DocumentTree documentTree;
|
||||
|
||||
@Builder.Default
|
||||
Set<TextEntity> entities = new HashSet<>();
|
||||
Set<SemanticEntity> entities = new HashSet<>();
|
||||
|
||||
Map<Page, Rectangle2D> bBoxCache;
|
||||
|
||||
@ -61,7 +62,7 @@ public abstract class AbstractSemanticNode implements GenericSemanticNode {
|
||||
|
||||
|
||||
@Override
|
||||
public Map<Page, Rectangle2D> getBBox() {
|
||||
public Map<Page, Rectangle2D>getBBox() {
|
||||
|
||||
if (bBoxCache == null) {
|
||||
bBoxCache = GenericSemanticNode.super.getBBox();
|
||||
|
||||
@ -5,6 +5,7 @@ import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
@ -42,7 +43,7 @@ public class Page {
|
||||
Footer footer;
|
||||
|
||||
@Builder.Default
|
||||
Set<TextEntity> entities = new HashSet<>();
|
||||
Set<SemanticEntity> semanticEntities = new HashSet<>();
|
||||
|
||||
@Builder.Default
|
||||
Set<Image> images = new HashSet<>();
|
||||
|
||||
@ -1,6 +1,9 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
|
||||
@ -4,6 +4,7 @@ import static java.lang.String.format;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
@ -17,12 +18,16 @@ import java.util.stream.Stream;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConsecutiveTextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.IdBuilder;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility;
|
||||
|
||||
@ -74,7 +79,25 @@ public interface SemanticNode {
|
||||
*
|
||||
* @return Set of all Entities associated with this Node
|
||||
*/
|
||||
Set<TextEntity> getEntities();
|
||||
Set<SemanticEntity> getEntities();
|
||||
|
||||
|
||||
default Set<TextEntity> getTextEntities() {
|
||||
|
||||
return getEntities().stream()
|
||||
.filter(semanticEntity -> semanticEntity instanceof TextEntity)
|
||||
.map(semanticEntity -> (TextEntity) semanticEntity)
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
|
||||
default Set<TableEntity> getTableEntities() {
|
||||
|
||||
return getEntities().stream()
|
||||
.filter(semanticEntity -> semanticEntity instanceof TableEntity)
|
||||
.map(semanticEntity -> (TableEntity) semanticEntity)
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@ -85,9 +108,9 @@ public interface SemanticNode {
|
||||
*/
|
||||
default Stream<TextEntity> streamValidEntities() {
|
||||
|
||||
return getEntities().stream()
|
||||
return getTextEntities().stream()
|
||||
.filter(IEntity::active)
|
||||
.filter(TextEntity::validEntityType);
|
||||
.filter(SemanticEntity::validEntityType);
|
||||
}
|
||||
|
||||
|
||||
@ -638,18 +661,18 @@ public interface SemanticNode {
|
||||
* This function is used during insertion of EntityNodes into the graph, it checks if the TextRange of the RedactionEntity intersects or even contains the RedactionEntity.
|
||||
* It sets the fields accordingly and recursively calls this function on all its children.
|
||||
*
|
||||
* @param textEntity RedactionEntity, which is being inserted into the graph
|
||||
* @param entity RedactionEntity, which is being inserted into the graph
|
||||
*/
|
||||
default void addThisToEntityIfIntersects(TextEntity textEntity) {
|
||||
default void addThisToEntityIfIntersects(SemanticEntity entity) {
|
||||
|
||||
TextBlock textBlock = getTextBlock();
|
||||
if (textBlock.getTextRange().intersects(textEntity.getTextRange())) {
|
||||
if (textBlock.containsTextRange(textEntity.getTextRange())) {
|
||||
textEntity.setDeepestFullyContainingNode(this);
|
||||
if (textBlock.getTextRange().intersects(entity.getTextRange())) {
|
||||
if (textBlock.containsTextRange(entity.getTextRange())) {
|
||||
entity.setDeepestFullyContainingNode(this);
|
||||
}
|
||||
textEntity.addIntersectingNode(this);
|
||||
getDocumentTree().findIntersectingChildNodes(getTreeId(), textEntity.getTextRange())
|
||||
.forEach(node -> node.addThisToEntityIfIntersects(textEntity));
|
||||
entity.addIntersectingNode(this);
|
||||
getDocumentTree().findIntersectingChildNodes(getTreeId(), entity.getTextRange())
|
||||
.forEach(node -> node.addThisToEntityIfIntersects(entity));
|
||||
}
|
||||
}
|
||||
|
||||
@ -838,4 +861,17 @@ public interface SemanticNode {
|
||||
return pages.size() == 1 && pages.contains(page);
|
||||
}
|
||||
|
||||
|
||||
default String buildId(TextRange textRange, String type, EntityType entityType) {
|
||||
|
||||
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = getPositionsPerPage(textRange);
|
||||
return IdBuilder.buildId(rectanglesPerLinePerPage.keySet(),
|
||||
rectanglesPerLinePerPage.values()
|
||||
.stream()
|
||||
.flatMap(Collection::stream)
|
||||
.toList(),
|
||||
type,
|
||||
entityType.name());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -15,6 +15,7 @@ import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
@ -48,7 +49,7 @@ public class Table implements SemanticNode {
|
||||
TextBlock textBlock;
|
||||
|
||||
@Builder.Default
|
||||
Set<TextEntity> entities = new HashSet<>();
|
||||
Set<SemanticEntity> entities = new HashSet<>();
|
||||
|
||||
Map<Page, Rectangle2D> bBoxCache;
|
||||
|
||||
@ -109,7 +110,7 @@ public class Table implements SemanticNode {
|
||||
.toList();
|
||||
return streamTableCells().filter(tableCellNode -> vertebrateStudyCols.stream()
|
||||
.anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value)))
|
||||
.map(TableCell::getEntities)
|
||||
.map(TableCell::getTextEntities)
|
||||
.flatMap(Collection::stream);
|
||||
}
|
||||
|
||||
@ -128,7 +129,7 @@ public class Table implements SemanticNode {
|
||||
.toList();
|
||||
return streamTableCells().filter(tableCellNode -> colsWithHeader.stream()
|
||||
.anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values)))
|
||||
.map(TableCell::getEntities)
|
||||
.map(TableCell::getTextEntities)
|
||||
.flatMap(Collection::stream);
|
||||
}
|
||||
|
||||
@ -146,7 +147,7 @@ public class Table implements SemanticNode {
|
||||
.filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type)
|
||||
.anyMatch(types::contains))
|
||||
.flatMap(this::streamRow)
|
||||
.map(TableCell::getEntities)
|
||||
.map(TableCell::getTextEntities)
|
||||
.flatMap(Collection::stream);
|
||||
}
|
||||
|
||||
@ -167,7 +168,7 @@ public class Table implements SemanticNode {
|
||||
return entityTypes.containsAll(types);
|
||||
})
|
||||
.flatMap(this::streamRow)
|
||||
.map(TableCell::getEntities)
|
||||
.map(TableCell::getTextEntities)
|
||||
.flatMap(Collection::stream);
|
||||
}
|
||||
|
||||
@ -185,7 +186,7 @@ public class Table implements SemanticNode {
|
||||
.filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type)
|
||||
.noneMatch(types::contains))
|
||||
.flatMap(this::streamRow)
|
||||
.map(TableCell::getEntities)
|
||||
.map(TableCell::getTextEntities)
|
||||
.flatMap(Collection::stream);
|
||||
}
|
||||
|
||||
@ -426,4 +427,30 @@ public class Table implements SemanticNode {
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
||||
|
||||
public String asCsv() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
for (int row = 0; row < numberOfRows; row++) {
|
||||
for (int col = 0; col < numberOfCols; col++) {
|
||||
TableCell cell = getCell(row, col);
|
||||
String cellText = cell.getTextBlock().getSearchText().replaceAll("\\r?\\n", " ").trim();
|
||||
|
||||
if (cellText.contains(",") || cellText.contains("\"")) {
|
||||
cellText = "\"" + cellText.replace("\"", "\"\"") + "\"";
|
||||
}
|
||||
|
||||
sb.append(cellText);
|
||||
|
||||
if (col < numberOfCols - 1) {
|
||||
sb.append(",");
|
||||
}
|
||||
}
|
||||
sb.append("\n");
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -5,6 +5,8 @@ import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.IntersectingNodeVisitor;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
@ -50,15 +52,15 @@ public class EntityCreationUtility {
|
||||
}
|
||||
|
||||
|
||||
public void addToPages(TextEntity entity) {
|
||||
public void addToPages(SemanticEntity entity) {
|
||||
|
||||
Set<Page> pages = entity.getDeepestFullyContainingNode().getPages(entity.getTextRange());
|
||||
entity.getPages().addAll(pages);
|
||||
pages.forEach(page -> page.getEntities().add(entity));
|
||||
pages.forEach(page -> page.getSemanticEntities().add(entity));
|
||||
}
|
||||
|
||||
|
||||
public void addEntityToNodeEntitySets(TextEntity entity) {
|
||||
public void addEntityToNodeEntitySets(SemanticEntity entity) {
|
||||
|
||||
entity.getIntersectingNodes()
|
||||
.forEach(node -> node.getEntities().add(entity));
|
||||
|
||||
@ -20,12 +20,15 @@ public class EntityEnrichmentService {
|
||||
|
||||
|
||||
public void enrichEntity(TextEntity entity, TextBlock textBlock) {
|
||||
|
||||
entity.setValue(textBlock.subSequence(entity.getTextRange()).toString());
|
||||
entity.setTextAfter(findTextAfter(entity.getTextRange().end(), textBlock));
|
||||
entity.setTextBefore(findTextBefore(entity.getTextRange().start(), textBlock));
|
||||
}
|
||||
|
||||
|
||||
private String findTextAfter(int index, TextBlock textBlock) {
|
||||
|
||||
int endOffset = Math.min(index + SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().end());
|
||||
String textAfter = textBlock.subSequence(index, endOffset).toString();
|
||||
if (!textAfter.isBlank()) {
|
||||
@ -38,7 +41,9 @@ public class EntityEnrichmentService {
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
private String findTextBefore(int index, TextBlock textBlock) {
|
||||
|
||||
int offsetBefore = Math.max(index - SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().start());
|
||||
String textBefore = textBlock.subSequence(offsetBefore, index).toString();
|
||||
if (!textBefore.isBlank()) {
|
||||
@ -51,13 +56,17 @@ public class EntityEnrichmentService {
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
private List<String> splitToWordsAndRemoveEmptyWords(String text) {
|
||||
|
||||
return Arrays.stream(text.split(" "))
|
||||
.filter(word -> !Objects.equals("", word))
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
private String concatWordsBefore(List<String> words, boolean endWithSpace) {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (String word : words) {
|
||||
sb.append(word).append(" ");
|
||||
@ -66,7 +75,9 @@ public class EntityEnrichmentService {
|
||||
return endWithSpace ? result + " " : result;
|
||||
}
|
||||
|
||||
|
||||
private String concatWordsAfter(List<String> words, boolean startWithSpace) {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (String word : words) {
|
||||
sb.append(word).append(" ");
|
||||
|
||||
@ -26,6 +26,9 @@ public class Component {
|
||||
|
||||
List<Entity> references;
|
||||
|
||||
@Builder.Default
|
||||
ComponentFormat componentFormat = ComponentFormat.TEXT;
|
||||
|
||||
|
||||
public boolean addReference(Entity entity) {
|
||||
|
||||
|
||||
@ -0,0 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.model.component;
|
||||
|
||||
public enum ComponentFormat {
|
||||
|
||||
TEXT,
|
||||
CSV
|
||||
}
|
||||
@ -36,6 +36,8 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityTyp
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
||||
@ -70,11 +72,11 @@ public class EntityLogCreatorService {
|
||||
ObservationRegistry observationRegistry;
|
||||
|
||||
|
||||
private static boolean notFalsePositiveOrFalseRecommendationOrRemoval(TextEntity textEntity) {
|
||||
private static boolean notFalsePositiveOrFalseRecommendationOrRemoval(SemanticEntity semanticEntity) {
|
||||
|
||||
return !(textEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) //
|
||||
|| textEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION) //
|
||||
|| textEntity.getEntityType().equals(EntityType.DICTIONARY_REMOVAL));
|
||||
return !(semanticEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) //
|
||||
|| semanticEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION) //
|
||||
|| semanticEntity.getEntityType().equals(EntityType.DICTIONARY_REMOVAL));
|
||||
}
|
||||
|
||||
|
||||
@ -175,7 +177,7 @@ public class EntityLogCreatorService {
|
||||
|
||||
List<EntityLogEntry> entries = new ArrayList<>();
|
||||
|
||||
List<TextEntity> textEntities = document.getEntities()
|
||||
List<SemanticEntity> semanticEntities = document.getEntities()
|
||||
.stream()
|
||||
.filter(entity -> !entity.getValue().isEmpty())
|
||||
.filter(EntityLogCreatorService::notFalsePositiveOrFalseRecommendationOrRemoval)
|
||||
@ -190,7 +192,7 @@ public class EntityLogCreatorService {
|
||||
.toList();
|
||||
|
||||
List<String> allIds = new ArrayList<>();
|
||||
allIds.addAll(textEntities.stream()
|
||||
allIds.addAll(semanticEntities.stream()
|
||||
.flatMap(entity -> entity.getPositionsOnPagePerPage()
|
||||
.stream()
|
||||
.map(PositionOnPage::getId))
|
||||
@ -204,7 +206,7 @@ public class EntityLogCreatorService {
|
||||
|
||||
Map<String, List<ManualChange>> manualChangesMap = getManualChangesByEntityLogIds(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), allIds);
|
||||
|
||||
textEntities.forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode, analysisNumber, manualChangesMap)));
|
||||
semanticEntities.forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode, analysisNumber, manualChangesMap)));
|
||||
|
||||
images.forEach(imageNode -> entries.add(createEntityLogEntry(imageNode,
|
||||
dossierTemplateId,
|
||||
@ -219,19 +221,19 @@ public class EntityLogCreatorService {
|
||||
}
|
||||
|
||||
|
||||
public List<EntityLogEntry> toEntityLogEntries(TextEntity textEntity, int analysisNumber, Map<String, List<ManualChange>> existingManualChangesMap) {
|
||||
public List<EntityLogEntry> toEntityLogEntries(SemanticEntity semanticEntity, int analysisNumber, Map<String, List<ManualChange>> existingManualChangesMap) {
|
||||
|
||||
List<EntityLogEntry> entityLogEntries = new ArrayList<>();
|
||||
|
||||
// split entity into multiple entries if it occurs on multiple pages, since FE can't handle multi page entities
|
||||
for (PositionOnPage positionOnPage : textEntity.getPositionsOnPagePerPage()) {
|
||||
for (PositionOnPage positionOnPage : semanticEntity.getPositionsOnPagePerPage()) {
|
||||
|
||||
List<Position> rectanglesPerLine = positionOnPage.getRectanglePerLine()
|
||||
.stream()
|
||||
.map(rectangle2D -> new Position(rectangle2D, positionOnPage.getPage().getNumber()))
|
||||
.toList();
|
||||
|
||||
EntityLogEntry entityLogEntry = createEntityLogEntry(textEntity, analysisNumber, existingManualChangesMap.getOrDefault(positionOnPage.getId(), new ArrayList<>()));
|
||||
EntityLogEntry entityLogEntry = createEntityLogEntry(semanticEntity, analysisNumber, existingManualChangesMap.getOrDefault(positionOnPage.getId(), new ArrayList<>()));
|
||||
|
||||
// set the ID from the positions, since it might contain a "-" with the page number if the entity is split across multiple pages
|
||||
entityLogEntry.setId(positionOnPage.getId());
|
||||
@ -317,12 +319,24 @@ public class EntityLogCreatorService {
|
||||
}
|
||||
|
||||
|
||||
private EntityLogEntry createEntityLogEntry(SemanticEntity entity, int analysisNumber, List<ManualChange> existingManualChanges) {
|
||||
|
||||
if (entity instanceof TextEntity textEntity) {
|
||||
return createEntityLogEntry(textEntity, analysisNumber, existingManualChanges);
|
||||
} else if (entity instanceof TableEntity tableEntity) {
|
||||
return createEntityLogEntry(tableEntity, analysisNumber, existingManualChanges);
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Wrong semantic entity type given for entity log entry creation!!");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private EntityLogEntry createEntityLogEntry(TextEntity entity, int analysisNumber, List<ManualChange> existingManualChanges) {
|
||||
|
||||
Set<String> referenceIds = new HashSet<>();
|
||||
entity.references()
|
||||
.stream()
|
||||
.filter(TextEntity::active)
|
||||
.filter(SemanticEntity::active)
|
||||
.forEach(ref -> ref.getPositionsOnPagePerPage()
|
||||
.forEach(pos -> referenceIds.add(pos.getId())));
|
||||
|
||||
@ -365,7 +379,42 @@ public class EntityLogCreatorService {
|
||||
}
|
||||
|
||||
|
||||
private int determinePageParagraphIndex(TextEntity textEntity, EntryType entryType) {
|
||||
private EntityLogEntry createEntityLogEntry(TableEntity tableEntity, int analysisNumber, List<ManualChange> existingManualChanges) {
|
||||
|
||||
Set<String> referenceIds = new HashSet<>();
|
||||
tableEntity.references()
|
||||
.stream()
|
||||
.filter(IEntity::applied)
|
||||
.forEach(ref -> ref.getPositionsOnPagePerPage()
|
||||
.forEach(pos -> referenceIds.add(pos.getId())));
|
||||
|
||||
EntryType entryType = buildEntryType(tableEntity);
|
||||
|
||||
List<ManualChange> allManualChanges = ManualChangeFactory.toLocalManualChangeList(tableEntity.getManualOverwrite().getManualChangeLog(), true, analysisNumber);
|
||||
|
||||
return EntityLogEntry.builder()
|
||||
.reason(tableEntity.buildReason())
|
||||
.legalBasis(tableEntity.legalBasis())
|
||||
.value(tableEntity.getValue())
|
||||
.type(tableEntity.type())
|
||||
.section(tableEntity.getManualOverwrite().getSection()
|
||||
.orElse(this.buildSectionString(tableEntity.getDeepestFullyContainingNode())))
|
||||
.containingNodeId(tableEntity.getDeepestFullyContainingNode().getTreeId())
|
||||
.closestHeadline(tableEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText())
|
||||
.matchedRule(tableEntity.getMatchedRule().getRuleIdentifier().toString())
|
||||
.startOffset(tableEntity.getTextRange().start())
|
||||
.endOffset(tableEntity.getTextRange().end())
|
||||
// .engines(getEngines(tableEntity.getEngines(), tableEntity.getManualOverwrite()))
|
||||
.reference(referenceIds)
|
||||
.manualChanges(ManualChangesUtils.mergeManualChanges(existingManualChanges, allManualChanges))
|
||||
.state(buildEntryState(tableEntity))
|
||||
.entryType(entryType)
|
||||
.paragraphPageIdx(determinePageParagraphIndex(tableEntity, entryType))
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private int determinePageParagraphIndex(SemanticEntity textEntity, EntryType entryType) {
|
||||
|
||||
int pageParagraphIdx = -1;
|
||||
|
||||
@ -414,7 +463,7 @@ public class EntityLogCreatorService {
|
||||
|
||||
public static EntryType buildEntryType(IEntity entity) {
|
||||
|
||||
if (entity instanceof TextEntity textEntity) {
|
||||
if (entity instanceof SemanticEntity textEntity) {
|
||||
return getEntryType(textEntity.getEntityType());
|
||||
} else if (entity instanceof PrecursorEntity precursorEntity) {
|
||||
if (precursorEntity.isRectangle()) {
|
||||
|
||||
@ -18,8 +18,6 @@ import com.google.common.collect.Sets;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
@ -146,9 +144,9 @@ public class ManualChangesApplicationService {
|
||||
Set<Page> newIntersectingPages = new HashSet<>(closestEntity.getPages());
|
||||
|
||||
Sets.difference(currentIntersectingPages, newIntersectingPages)
|
||||
.forEach(removedPage -> removedPage.getEntities().remove(entityToBeResized));
|
||||
.forEach(removedPage -> removedPage.getSemanticEntities().remove(entityToBeResized));
|
||||
Sets.difference(newIntersectingPages, currentIntersectingPages)
|
||||
.forEach(addedPage -> addedPage.getEntities().add(entityToBeResized));
|
||||
.forEach(addedPage -> addedPage.getSemanticEntities().add(entityToBeResized));
|
||||
|
||||
entityToBeResized.setDeepestFullyContainingNode(closestEntity.getDeepestFullyContainingNode());
|
||||
entityToBeResized.setIntersectingNodes(new ArrayList<>(newIntersectingNodes));
|
||||
|
||||
@ -102,9 +102,9 @@ public class UnprocessedChangesService {
|
||||
}
|
||||
|
||||
document.getEntities()
|
||||
.forEach(textEntity -> {
|
||||
.forEach(entity -> {
|
||||
Set<String> processedIds = new HashSet<>();
|
||||
for (var positionsOnPerPage : textEntity.getPositionsOnPagePerPage()) {
|
||||
for (var positionsOnPerPage : entity.getPositionsOnPagePerPage()) {
|
||||
if (processedIds.contains(positionsOnPerPage.getId())) {
|
||||
continue;
|
||||
}
|
||||
@ -113,17 +113,18 @@ public class UnprocessedChangesService {
|
||||
.stream()
|
||||
.map(rectangle2D -> new Position(rectangle2D, positionsOnPerPage.getPage().getNumber()))
|
||||
.collect(Collectors.toList());
|
||||
unprocessedManualEntities.add(UnprocessedManualEntity.builder()
|
||||
.annotationId(allAnnotationIds.stream()
|
||||
.filter(textEntity::matchesAnnotationId)
|
||||
.findFirst()
|
||||
.orElse(""))
|
||||
.textBefore(textEntity.getTextBefore())
|
||||
.textAfter(textEntity.getTextAfter())
|
||||
.section(textEntity.getManualOverwrite().getSection()
|
||||
.orElse(textEntity.getDeepestFullyContainingNode().toString()))
|
||||
.positions(positions)
|
||||
.build());
|
||||
UnprocessedManualEntity.UnprocessedManualEntityBuilder builder = UnprocessedManualEntity.builder()
|
||||
.annotationId(allAnnotationIds.stream()
|
||||
.filter(entity::matchesAnnotationId)
|
||||
.findFirst()
|
||||
.orElse(""))
|
||||
.section(entity.getManualOverwrite().getSection()
|
||||
.orElse(entity.getDeepestFullyContainingNode().toString()))
|
||||
.positions(positions);
|
||||
if (entity instanceof TextEntity textEntity) {
|
||||
builder.textBefore(textEntity.getTextBefore()).textAfter(textEntity.getTextAfter());
|
||||
}
|
||||
unprocessedManualEntities.add(builder.build());
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@ -23,8 +23,11 @@ import java.util.stream.Stream;
|
||||
import org.kie.api.runtime.KieSession;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.component.Component;
|
||||
import com.iqser.red.service.redaction.v1.server.model.component.ComponentFormat;
|
||||
import com.iqser.red.service.redaction.v1.server.model.component.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.RuleIdentifier;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.DateConverter;
|
||||
@ -98,6 +101,22 @@ public class ComponentCreationService {
|
||||
.value(value)
|
||||
.valueDescription(valueDescription)
|
||||
.references(new LinkedList<>(references))
|
||||
.componentFormat(ComponentFormat.TEXT)
|
||||
.build());
|
||||
}
|
||||
|
||||
|
||||
public void create(String ruleIdentifier, String name, String value, String valueDescription, Collection<Entity> references, ComponentFormat componentFormat) {
|
||||
|
||||
referencedEntities.addAll(references);
|
||||
|
||||
kieSession.insert(Component.builder()
|
||||
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
|
||||
.name(name)
|
||||
.value(value)
|
||||
.valueDescription(valueDescription)
|
||||
.references(new LinkedList<>(references))
|
||||
.componentFormat(componentFormat)
|
||||
.build());
|
||||
}
|
||||
|
||||
@ -376,6 +395,27 @@ public class ComponentCreationService {
|
||||
}
|
||||
|
||||
|
||||
public void createComponentForTables(String ruleIdentifier, Collection<Entity> entities) {
|
||||
|
||||
entities.stream()
|
||||
.filter(entity -> !referencedEntities.contains(entity))
|
||||
.sorted(EntityComparators.first())
|
||||
.forEach(entity -> {
|
||||
String value = entity.getValue();
|
||||
ComponentFormat componentFormat = ComponentFormat.TEXT;
|
||||
SemanticNode containingNode = entity.getContainingNode();
|
||||
|
||||
if (containingNode instanceof TableCell cell) { // case for tables with 0 rows and 0 columns
|
||||
value = cell.getTextBlock().getSearchText();
|
||||
} else if (containingNode instanceof Table table) {
|
||||
value = table.asCsv();
|
||||
componentFormat = ComponentFormat.CSV;
|
||||
}
|
||||
create(ruleIdentifier, entity.getType(), value, "Table Entity", List.of(entity), componentFormat);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Converts entity values to the 'dd/MM/yyyy' format and joins them with ', '. If the value could not be parsed as a date, it will be created as is.
|
||||
*
|
||||
|
||||
@ -22,7 +22,9 @@ import com.iqser.red.service.redaction.v1.server.model.document.ConsecutiveBound
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
@ -907,6 +909,34 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a table entity based on the document table.
|
||||
*
|
||||
* @param table The table to base the table entity on.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The entity's classification.
|
||||
* @return An optional containing the created {@link TableEntity}.
|
||||
*/
|
||||
public Optional<TableEntity> bySemanticNode(Table table, String type, EntityType entityType) {
|
||||
|
||||
TableEntity tableEntity = TableEntity.initialEntityNode(table, type, entityType);
|
||||
|
||||
Optional<TableEntity> optionalTableEntity = table.getDocumentTree().getRoot().getNode().getEntities()
|
||||
.stream()
|
||||
.filter(e -> e instanceof TableEntity && e.equals(tableEntity) && e.type().equals(type))
|
||||
.map(e -> (TableEntity) e)
|
||||
.findAny();
|
||||
if (optionalTableEntity.isPresent()) {
|
||||
return optionalTableEntity;
|
||||
}
|
||||
|
||||
addListenerToEntity(tableEntity);
|
||||
table.getDocumentTree().addEntityToGraph(tableEntity);
|
||||
|
||||
return Optional.of(tableEntity);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Expands a text entity's start boundary based on a regex pattern match.
|
||||
*
|
||||
@ -978,7 +1008,8 @@ public class EntityCreationService {
|
||||
if (node.getDocumentTree().getRoot().getNode().getEntities().contains(entity)) {
|
||||
Optional<TextEntity> optionalTextEntity = node.getDocumentTree().getRoot().getNode().getEntities()
|
||||
.stream()
|
||||
.filter(e -> e.equals(entity) && e.type().equals(type))
|
||||
.filter(e -> e instanceof TextEntity && e.equals(entity) && e.type().equals(type))
|
||||
.map(e -> (TextEntity) e)
|
||||
.peek(e -> e.addEngines(engines))
|
||||
.findAny();
|
||||
if (optionalTextEntity.isEmpty()) {
|
||||
@ -1419,7 +1450,7 @@ public class EntityCreationService {
|
||||
.filter(e -> e.equals(entity))//
|
||||
.filter(e -> !e.getTextRange().equals(entity.getTextRange()))//
|
||||
.findAny()
|
||||
.ifPresent(e -> addDuplicateEntityToGraph(e, entity.getTextRange(), node));
|
||||
.ifPresent(e -> addDuplicateEntityToGraph((TextEntity) e, entity.getTextRange(), node));
|
||||
|
||||
} else {
|
||||
addListenerToEntity(entity);
|
||||
@ -1469,19 +1500,17 @@ public class EntityCreationService {
|
||||
}
|
||||
additionalIntersectingNode.getEntities().add(entityToDuplicate);
|
||||
additionalIntersectingNode.getPages(newTextRange)
|
||||
.forEach(page -> page.getEntities().add(entityToDuplicate));
|
||||
.forEach(page -> page.getSemanticEntities().add(entityToDuplicate));
|
||||
entityToDuplicate.addIntersectingNode(additionalIntersectingNode);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
private void addListenerToEntity(TextEntity textEntity) {
|
||||
private void addListenerToEntity(IEntity textEntity) {
|
||||
|
||||
if(kieSessionUpdater != null) {
|
||||
if (kieSessionUpdater != null) {
|
||||
textEntity.addEntityEventListener(kieSessionUpdater);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -162,7 +162,7 @@ public class EntityFromPrecursorCreationService {
|
||||
correctEntity.getIntersectingNodes()
|
||||
.forEach(n -> n.getEntities().add(correctEntity));
|
||||
correctEntity.getPages()
|
||||
.forEach(page -> page.getEntities().add(correctEntity));
|
||||
.forEach(page -> page.getSemanticEntities().add(correctEntity));
|
||||
|
||||
correctEntity.addMatchedRules(precursorEntity.getMatchedRuleList());
|
||||
correctEntity.setDictionaryEntry(precursorEntity.isDictionaryEntry());
|
||||
|
||||
@ -28,6 +28,7 @@ import com.iqser.red.service.redaction.v1.server.logger.RulesLogger;
|
||||
import com.iqser.red.service.redaction.v1.server.logger.TrackingAgendaEventListener;
|
||||
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
||||
@ -128,9 +129,9 @@ public class EntityDroolsExecutionService {
|
||||
}
|
||||
});
|
||||
|
||||
for (TextEntity textEntity : document.getEntities()) {
|
||||
textEntity.addEntityEventListener(kieSessionUpdater);
|
||||
textEntity.notifyEntityInserted();
|
||||
for (SemanticEntity semanticEntity : document.getEntities()) {
|
||||
semanticEntity.addEntityEventListener(kieSessionUpdater);
|
||||
semanticEntity.notifyEntityInserted();
|
||||
}
|
||||
|
||||
document.getPages()
|
||||
|
||||
@ -9,6 +9,8 @@ import org.kie.api.runtime.rule.FactHandle;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityEventListener;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
@ -50,14 +52,14 @@ public class KieSessionUpdater implements EntityEventListener {
|
||||
|
||||
private void handleOnEntityEvent(IEntity entity, Consumer<Object> consumer) {
|
||||
|
||||
if (entity instanceof TextEntity textEntity) {
|
||||
updateIntersectingNodes(textEntity);
|
||||
textEntity.getRelations().values()
|
||||
if (entity instanceof SemanticEntity semanticEntity) {
|
||||
updateIntersectingNodes(semanticEntity);
|
||||
semanticEntity.getRelations().values()
|
||||
.stream()
|
||||
.flatMap(Collection::stream)
|
||||
.forEach(consumer);
|
||||
textEntity.getRelations().keySet()
|
||||
.forEach(k -> k.getRelations().getOrDefault(textEntity, Collections.emptySet())
|
||||
semanticEntity.getRelations().keySet()
|
||||
.forEach(k -> k.getRelations().getOrDefault(semanticEntity, Collections.emptySet())
|
||||
.forEach(consumer));
|
||||
}
|
||||
|
||||
@ -71,9 +73,9 @@ public class KieSessionUpdater implements EntityEventListener {
|
||||
}
|
||||
|
||||
|
||||
private void updateIntersectingNodes(TextEntity textEntity) {
|
||||
private void updateIntersectingNodes(SemanticEntity semanticEntity) {
|
||||
|
||||
textEntity.getIntersectingNodes()
|
||||
semanticEntity.getIntersectingNodes()
|
||||
.forEach(this::updateFactIfPresent);
|
||||
}
|
||||
|
||||
|
||||
@ -37,6 +37,8 @@ import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@ExtendWith(SpringExtension.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, properties = {"application.type=DocuMine"})
|
||||
public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||
@ -262,6 +264,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testDoseMortalityExtraction() {
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/Documine/Flora/VV-547525_Toxicidade_Oral_Aguda.pdf");
|
||||
@ -269,9 +272,18 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||
System.out.println("Start Full integration test");
|
||||
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
|
||||
System.out.println("Finished structure analysis");
|
||||
analyzeService.analyze(request);
|
||||
AnalyzeResult analyze = analyzeService.analyze(request);
|
||||
System.out.println("Finished analysis");
|
||||
|
||||
|
||||
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
|
||||
|
||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/Documine.pdf";
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
|
||||
var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
var doseMortality = componentLog.getComponentLogEntries()
|
||||
.stream()
|
||||
|
||||
@ -0,0 +1,90 @@
|
||||
package com.iqser.red.service.redaction.v1.server;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
|
||||
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
|
||||
@ExtendWith(SpringExtension.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, properties = {"application.type=DocuMine"})
|
||||
class TableComponentsIntegrationTest extends AbstractRedactionIntegrationTest {
|
||||
|
||||
private static final String RULES = loadFromClassPath("drools/documine_flora_table_test.drl");
|
||||
private static final String COMPONENT_RULES = loadFromClassPath("drools/documine_flora_table_test_components.drl");
|
||||
private static final String DATE_FORMATS = loadFromClassPath("dateFormats.txt");
|
||||
|
||||
|
||||
@BeforeEach
|
||||
public void stubClients() {
|
||||
|
||||
TenantContext.setTenantId("redaction");
|
||||
|
||||
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(System.currentTimeMillis());
|
||||
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(RULES));
|
||||
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.COMPONENT)).thenReturn(System.currentTimeMillis());
|
||||
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.COMPONENT)).thenReturn(JSONPrimitive.of(COMPONENT_RULES));
|
||||
when(dateFormatsClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(System.currentTimeMillis());
|
||||
when(dateFormatsClient.getDateFormats(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(DATE_FORMATS));
|
||||
|
||||
loadDictionaryForTest();
|
||||
loadTypeForTest();
|
||||
loadNerForTest();
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, null, true)).thenReturn(getTemplateDictionaryTypeResponse());
|
||||
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, null, true)).thenReturn(getDossierDictionaryTypeResponse());
|
||||
mockDictionaryCalls(null);
|
||||
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testTableComponentsCreation() throws IOException {
|
||||
|
||||
AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/Documine/Flora/VV-547525_Toxicidade_Oral_Aguda.pdf");
|
||||
|
||||
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
|
||||
|
||||
analyzeService.analyze(request);
|
||||
|
||||
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
|
||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/TableComponents.pdf";
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
|
||||
|
||||
var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
boolean tableComponentFound = componentLog.getComponentLogEntries()
|
||||
.stream()
|
||||
.anyMatch(entry -> "Table".equals(entry.getName()));
|
||||
|
||||
assertTrue(tableComponentFound, "Expected table component 'Table' to be present in the component log");
|
||||
|
||||
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
boolean tableEntityFound = entityLog.getEntityLogEntry()
|
||||
.stream()
|
||||
.anyMatch(entry -> entry.getMatchedRule() != null && entry.getMatchedRule().contains("T.0.0"));
|
||||
|
||||
assertTrue(tableEntityFound, "Expected table entity creation ('T.0.0') to be present in the entity log");
|
||||
}
|
||||
|
||||
}
|
||||
@ -348,11 +348,11 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
|
||||
.orElseThrow();
|
||||
|
||||
assertEquals(textEntity.getValue(), searchTerm);
|
||||
assertTrue(pageNode.getEntities().contains(textEntity));
|
||||
assertTrue(pageNode.getSemanticEntities().contains(textEntity));
|
||||
assertTrue(document.getPages()
|
||||
.stream()
|
||||
.filter(page -> page != pageNode)
|
||||
.noneMatch(page -> page.getEntities().contains(textEntity)));
|
||||
.noneMatch(page -> page.getSemanticEntities().contains(textEntity)));
|
||||
assertTrue(textEntity.getPages().contains(pageNode));
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
|
||||
assertTrue(textEntity.getIntersectingNodes()
|
||||
|
||||
@ -30,6 +30,7 @@ import com.iqser.red.service.redaction.v1.server.logger.Context;
|
||||
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionarySearch;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
@ -200,14 +201,14 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest {
|
||||
System.out.printf("%d Searches took %s s, average %.2f ms\n", numberOfRuns, ((float) totalSearchTime / 1000), totalSearchTime / numberOfRuns);
|
||||
System.out.printf("%d Insertions took %s s, average %.2f ms\n", numberOfRuns, ((float) totalInsertTime / 1000), totalInsertTime / numberOfRuns);
|
||||
System.out.printf("Found %d entities and saved %d\n", foundEntities.size(), document.getEntities().size());
|
||||
for (TextEntity entity : document.getEntities()) {
|
||||
for (TextEntity entity : document.getTextEntities()) {
|
||||
var foundEntity = foundEntities.stream()
|
||||
.filter(f -> f.getId().equals(entity.getId()))
|
||||
.findFirst()
|
||||
.get();
|
||||
assertTrue(foundEntity.getTextRange().equals(entity.getTextRange()) || foundEntity.getDuplicateTextRanges().contains(entity.getTextRange()));
|
||||
}
|
||||
assert document.getEntities()
|
||||
assert document.getTextEntities()
|
||||
.stream()
|
||||
.mapToInt(e -> e.getDuplicateTextRanges().size() + 1).sum() == foundEntities.size();
|
||||
assert foundEntities.stream()
|
||||
@ -225,10 +226,10 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest {
|
||||
try (PDDocument pdDocument = Loader.loadPDF(fileResource.getFile())) {
|
||||
|
||||
for (Page page : document.getPages()) {
|
||||
List<Rectangle2D> entityPositionsOnPage = page.getEntities()
|
||||
List<Rectangle2D> entityPositionsOnPage = page.getSemanticEntities()
|
||||
.stream()
|
||||
.filter(entityNode -> !entityNode.removed())
|
||||
.filter(TextEntity::applied)
|
||||
.filter(SemanticEntity::applied)
|
||||
.flatMap(entityNode -> entityNode.getPositionsOnPagePerPage()
|
||||
.stream())
|
||||
.filter(entityPosition -> entityPosition.getPage().equals(page))
|
||||
@ -241,7 +242,7 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest {
|
||||
}
|
||||
|
||||
for (Page page : document.getPages()) {
|
||||
List<Rectangle2D> entityPositionsOnPage = page.getEntities()
|
||||
List<Rectangle2D> entityPositionsOnPage = page.getSemanticEntities()
|
||||
.stream()
|
||||
.filter(entityNode -> !entityNode.removed())
|
||||
.filter(entityNode -> !entityNode.applied())
|
||||
|
||||
@ -83,7 +83,7 @@ public class TableTest extends BuildDocumentIntegrationTest {
|
||||
file);
|
||||
PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null);
|
||||
|
||||
var entityLayerGroup = EntityVisualizationUtility.createEntityLayerGroup(document.getEntities(), Color.MAGENTA);
|
||||
var entityLayerGroup = EntityVisualizationUtility.createEntityLayerGroup(document.getTextEntities(), Color.MAGENTA);
|
||||
|
||||
viewerDocumentService.addLayerGroups(file, file, List.of(entityLayerGroup));
|
||||
}
|
||||
|
||||
@ -22,7 +22,7 @@ public class Cbi11Test extends RulesIntegrationTest {
|
||||
|
||||
doAnalysis(document, Collections.emptyList());
|
||||
|
||||
List<String> authorNames = document.getEntities()
|
||||
List<String> authorNames = document.getTextEntities()
|
||||
.stream()
|
||||
.map(Dictionary::splitIntoAuthorNames)
|
||||
.flatMap(Collection::stream)
|
||||
|
||||
@ -9,6 +9,7 @@ import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.knecon.fforesight.service.viewerdoc.LayerIdentifier;
|
||||
@ -46,9 +47,9 @@ public class EntityVisualizationUtility {
|
||||
|
||||
private static List<ColoredRectangle> getEntityRectangles(Color color, Page page) {
|
||||
|
||||
return page.getEntities()
|
||||
return page.getSemanticEntities()
|
||||
.stream()
|
||||
.map(TextEntity::getPositionsOnPagePerPage)
|
||||
.map(SemanticEntity::getPositionsOnPagePerPage)
|
||||
.flatMap(Collection::stream)
|
||||
.filter(p -> p.getPage().equals(page))
|
||||
.map(PositionOnPage::getRectanglePerLine)
|
||||
|
||||
@ -460,7 +460,6 @@ rule "UsedApproach.1.0: Used approach not found and thus 'Individual'"
|
||||
componentCreationService.create("UsedApproach.1.0", "What_was_the_approach_used", "Individual", "'Group' when approach used is present, else 'Individual'");
|
||||
end
|
||||
|
||||
|
||||
rule "DefaultComponents.999.0: Create components for all unmapped entities."
|
||||
salience -999
|
||||
when
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,492 @@
|
||||
package drools
|
||||
|
||||
import static java.lang.String.format;
|
||||
import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.anyMatch;
|
||||
import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.exactMatch;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.Collection;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.Optional;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.logger.RulesLogger;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.component.Component;
|
||||
import com.iqser.red.service.redaction.v1.server.model.component.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.service.components.ComponentMappingService;
|
||||
import com.iqser.red.service.redaction.v1.server.service.document.ComponentCreationService;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Change;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
|
||||
|
||||
global ComponentCreationService componentCreationService
|
||||
global ComponentMappingService componentMappingService
|
||||
global RulesLogger logger
|
||||
|
||||
|
||||
//------------------------------------ queries ------------------------------------
|
||||
|
||||
query "getFileAttributes"
|
||||
$fileAttribute: FileAttribute()
|
||||
end
|
||||
|
||||
query "getComponents"
|
||||
$component: Component()
|
||||
end
|
||||
|
||||
//------------------------------------ table rules ------------------------------------
|
||||
|
||||
rule "TableComponents.0.0: Create components for all table entities."
|
||||
when
|
||||
$tables: List() from collect (Entity(type == "Table"))
|
||||
then
|
||||
componentCreationService.createComponentForTables("TableComponents.0.0", $tables);
|
||||
end
|
||||
|
||||
//------------------------------------ Default Components rules ------------------------------------
|
||||
|
||||
rule "StudyTitle.0.0: First Title found"
|
||||
when
|
||||
$titleCandidates: List() from collect (Entity(type == "title"))
|
||||
then
|
||||
componentCreationService.firstOrElse("StudyTitle.0.0", "Study_Title", $titleCandidates, "");
|
||||
end
|
||||
|
||||
|
||||
rule "PerformingLaboratory.1.0: Performing Laboratory name and country found in same section"
|
||||
when
|
||||
$laboratoryName: Entity(type == "laboratory_name", $node: containingNode)
|
||||
$laboratoryCountry: Entity(type == "laboratory_country", containingNode == $node)
|
||||
not Entity(type == "laboratory_country", containingNode == $node, Math.abs($laboratoryName.startOffset - startOffset) < Math.abs($laboratoryName.startOffset - $laboratoryCountry.startOffset))
|
||||
then
|
||||
componentCreationService.create("PerformingLaboratory.1.0", "Performing_Laboratory", $laboratoryName.getValue() + ", " + $laboratoryCountry.getValue(), "Laboratory name and country found!", List.of($laboratoryName, $laboratoryCountry));
|
||||
end
|
||||
|
||||
rule "PerformingLaboratory.2.0: Performing Laboratory name but no country found in same section"
|
||||
when
|
||||
$laboratoryName: Entity(type == "laboratory_name", $node: containingNode)
|
||||
not Entity(type == "laboratory_country", containingNode == $node)
|
||||
then
|
||||
componentCreationService.create("PerformingLaboratory.2.0", "Performing_Laboratory", $laboratoryName.getValue(), "Only laboratory name found!", List.of($laboratoryName));
|
||||
end
|
||||
|
||||
rule "PerformingLaboratory.0.2: Performing Laboratory not found"
|
||||
salience -1
|
||||
when
|
||||
not Component(name == "Performing_Laboratory")
|
||||
then
|
||||
componentCreationService.create("PerformingLaboratory.0.2", "Performing_Laboratory", "", "fallback");
|
||||
end
|
||||
|
||||
|
||||
rule "ReportNumber.0.0: First Report number found"
|
||||
when
|
||||
$reportNumberCandidates: List() from collect (Entity(type == "report_number"))
|
||||
then
|
||||
componentCreationService.firstOrElse("ReportNumber.0.0", "Report_Number", $reportNumberCandidates, "");
|
||||
end
|
||||
|
||||
|
||||
rule "GLPStudy.0.0: GLP Study found"
|
||||
when
|
||||
$glpStudyList: List(!isEmpty) from collect(Entity(type == "glp_study"))
|
||||
then
|
||||
componentCreationService.create("GLPStudy.0.0", "GLP_Study", "Yes", "Yes if present, No if not", $glpStudyList);
|
||||
end
|
||||
|
||||
rule "GLPStudy.1.0: GLP Study not found"
|
||||
when
|
||||
not Entity(type == "glp_study")
|
||||
then
|
||||
componentCreationService.create("GLPStudy.1.0", "GLP_Study", "No", "Yes if present, No if not");
|
||||
end
|
||||
|
||||
rule "TestGuideline.0.1: match OECD number and year with guideline mappings"
|
||||
salience 1
|
||||
when
|
||||
not Component(name == "Test_Guidelines_1")
|
||||
$guidelineNumber: Entity(type == "oecd_guideline_number", $number: value)
|
||||
$guidelineYear: Entity(type == "oecd_guideline_year", $year: value)
|
||||
then
|
||||
Optional<String> guidelineMatch = componentMappingService.from("GuidelineMapping").where("number = " + $number).where("year = " + $year).select("description").findAny();
|
||||
if (guidelineMatch.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
componentCreationService.create(
|
||||
"TestGuideline.0.0",
|
||||
"Test_Guidelines_1",
|
||||
guidelineMatch.get(),
|
||||
"OECD Number and guideline year mapped!",
|
||||
List.of($guidelineNumber, $guidelineYear)
|
||||
);
|
||||
end
|
||||
|
||||
rule "TestGuideline.1.0: no guideline mapping found"
|
||||
when
|
||||
not Component(name == "Test_Guidelines_1")
|
||||
$guideLine: Entity(type == "oecd_guideline")
|
||||
then
|
||||
componentCreationService.create("TestGuideline.2.0", "Test_Guidelines_1", $guideLine.getValue(), "No Mapping for OECD number and year found, using fallback instead!", List.of($guideLine));
|
||||
end
|
||||
|
||||
rule "TestGuideline.2.0: All values of EPA guideline and EC guidelines"
|
||||
when
|
||||
$guidelines: List() from collect (Entity(type == "epa_guideline" || type == "ec_guideline"))
|
||||
then
|
||||
componentCreationService.joining("TestGuideline.2.0", "Test_Guidelines_2", $guidelines);
|
||||
end
|
||||
|
||||
|
||||
rule "StartDate.0.0: All experimental start dates converted to dd/MM/yyyy"
|
||||
when
|
||||
$startDates: List() from collect (Entity(type == "experimental_start_date"))
|
||||
then
|
||||
componentCreationService.convertDates("StartDate.0.0", "Experimental_Starting_Date", $startDates);
|
||||
end
|
||||
|
||||
|
||||
rule "CompletionDate.0.0: All experimental end dates converted to dd/MM/yyyy"
|
||||
when
|
||||
$endDates: List() from collect (Entity(type == "experimental_end_date"))
|
||||
then
|
||||
componentCreationService.convertDates("CompletionDate.0.0", "Experimental_Completion_Date", $endDates);
|
||||
end
|
||||
|
||||
|
||||
rule "AnalysisCertificate.0.0: Unique values of certificate of analysis batch identification"
|
||||
when
|
||||
$batchNumbers: List() from collect (Entity(type == "batch_number"))
|
||||
then
|
||||
componentCreationService.joiningUnique("AnalysisCertificate.0.0", "Certificate_of_Analysis_Batch_Identification", $batchNumbers);
|
||||
end
|
||||
|
||||
rule "StudyConclusion.0.0: Study conclusion in first found section"
|
||||
when
|
||||
$oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$studyConclusions: List() from collect(Entity(type == "study_conclusion"))
|
||||
then
|
||||
componentCreationService.joiningFromFirstSectionOnly("StudyConclusion.0.0", "Study_Conclusion", $studyConclusions, " ");
|
||||
end
|
||||
|
||||
rule "GuidelineDeviation.0.0: Guideline deviation as sentences"
|
||||
when
|
||||
$oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$guidelineDeviations: List() from collect (Entity(type == "guideline_deviation"))
|
||||
then
|
||||
componentCreationService.joining("GuidelineDeviation.0.0", "Deviation_from_the_Guideline", $guidelineDeviations, "\n");
|
||||
end
|
||||
|
||||
rule "Species.0.0: First found species"
|
||||
when
|
||||
$oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$species: List() from collect (Entity(type == "species"))
|
||||
then
|
||||
componentCreationService.firstOrElse("Species.0.0", "Species", $species, "");
|
||||
end
|
||||
|
||||
rule "Strain.0.0: First found strain"
|
||||
when
|
||||
$oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$strain: List() from collect (Entity(type == "strain"))
|
||||
then
|
||||
componentCreationService.firstOrElse("Strain.0.0", "Strain", $strain, "");
|
||||
end
|
||||
|
||||
rule "Conclusion.0.0: Unique values of Conclusion LD50"
|
||||
when
|
||||
$oecdNumber: String() from List.of("402", "403", "425", "436")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$conclusions: List() from collect (Entity(type == "ld50_value"))
|
||||
then
|
||||
componentCreationService.joiningUnique("Conclusion.0.0", "Conclusion_LD50_mg_per_kg", $conclusions);
|
||||
end
|
||||
|
||||
rule "Conclusion0.1.0: Greater than found"
|
||||
when
|
||||
$oecdNumber: String() from List.of("402", "403", "425", "436")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$conclusions: List(!isEmpty()) from collect (Entity(type == "ld50_greater"))
|
||||
then
|
||||
componentCreationService.create("Conclusion.1.0", "Conclusion_LD50_Greater_than", "Greater than", "Entity of type 'ld50_greater' found", $conclusions);
|
||||
end
|
||||
|
||||
rule "Conclusion.1.1: Greater than not found"
|
||||
when
|
||||
$oecdNumber: String() from List.of("402", "403", "425", "436")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
not Entity(type == "ld50_greater")
|
||||
then
|
||||
componentCreationService.create("Conclusion.1.1", "Conclusion_LD50_Greater_than", "", "No entity of type 'ld50_greater' found");
|
||||
end
|
||||
|
||||
rule "Conclusion.2.0: Minimum confidence as unique values"
|
||||
when
|
||||
$oecdNumber: String() from List.of("402", "403", "425", "436")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$conclusions: List() from collect (Entity(type == "confidence_minimal"))
|
||||
then
|
||||
componentCreationService.joiningUnique("Conclusion.2.0", "Conclusion_Minimum_Confidence", $conclusions);
|
||||
end
|
||||
|
||||
rule "Conclusion.3.0: Maximum confidence as unique values"
|
||||
when
|
||||
$oecdNumber: String() from List.of("402", "403", "425", "436")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$conclusions: List() from collect (Entity(type == "confidence_maximal"))
|
||||
then
|
||||
componentCreationService.joiningUnique("Conclusion.3.0", "Conclusion_Maximum_Confidence", $conclusions);
|
||||
end
|
||||
|
||||
rule "Necropsy.0.0: Necropsy findings from longest section"
|
||||
when
|
||||
FileAttribute(label == "OECD Number", value == "402")
|
||||
$necropsies: List() from collect (Entity(type == "necropsy_findings"))
|
||||
then
|
||||
componentCreationService.joiningFromLongestSectionOnly("Necropsy.0.0", "Necropsy_Findings", $necropsies, " ");
|
||||
end
|
||||
|
||||
rule "Necropsy.0.1: Necropsy findings joined with \n"
|
||||
when
|
||||
FileAttribute(label == "OECD Number", value == "403" || value == "436")
|
||||
$necropsies: List() from collect (Entity(type == "necropsy_findings"))
|
||||
then
|
||||
componentCreationService.joining("Necropsy.0.0", "Necropsy_Findings", $necropsies, "\n");
|
||||
end
|
||||
|
||||
rule "Necropsy.1.0: Doses mg per kg of Bodyweight as one block"
|
||||
when
|
||||
FileAttribute(label == "OECD Number", value == "402")
|
||||
$dosages: List() from collect (Entity(type == "doses_(mg_kg_bw)"))
|
||||
then
|
||||
componentCreationService.joining("Necropsy.1.0", "Doses_mg_per_kg_bw", $dosages, " ");
|
||||
end
|
||||
|
||||
rule "Necropsy.2.0: Conducted with 4 hours of exposure as one block"
|
||||
when
|
||||
$oecdNumber: String() from List.of("403", "436")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$exposures: List() from collect (Entity(type == "4h_exposure"))
|
||||
then
|
||||
componentCreationService.joining("Necropsy.3.0", "Conducted_with_4_Hours_of_Exposure", $exposures, " ");
|
||||
end
|
||||
|
||||
rule "StudyDesign.0.0: Study design as one block"
|
||||
when
|
||||
$oecdNumber: String() from List.of("404", "405", "406", "428", "429", "438", "439", "474", "487")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$studyDesigns: List() from collect (Entity(type == "study_design"))
|
||||
then
|
||||
componentCreationService.joining("StudyDesign.0.0", "Study_Design", $studyDesigns, " ");
|
||||
end
|
||||
|
||||
rule "Results.0.0: Results and conclusions as joined values"
|
||||
when
|
||||
$oecdNumber: String() from List.of("406", "428", "438", "439", "474", "487")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$results: List() from collect (Entity(type == "results_and_conclusion"))
|
||||
then
|
||||
componentCreationService.joining("Results.0.0", "Results_and_Conclusions", $results, " ");
|
||||
end
|
||||
|
||||
rule "WeightBehavior.0.0: Weight change behavior as sentences"
|
||||
when
|
||||
FileAttribute(label == "OECD Number", value == "402")
|
||||
$weightChanges: List() from collect (Entity(type == "weight_behavior_changes"))
|
||||
then
|
||||
componentCreationService.joining("WeightBehavior.0.0", "Weight_Behavior_Changes", $weightChanges, "\n");
|
||||
end
|
||||
|
||||
rule "MortalityStatement.0.0: Mortality statements as one block"
|
||||
when
|
||||
FileAttribute(label == "OECD Number", value == "402")
|
||||
$mortalityStatements: List() from collect (Entity(type == "mortality_statement"))
|
||||
then
|
||||
componentCreationService.joining("MortalityStatement.0.0", "Mortality_Statement", $mortalityStatements, " ");
|
||||
end
|
||||
|
||||
rule "ClinicalObservations.0.0: Clinical observations as sentences"
|
||||
when
|
||||
FileAttribute(label == "OECD Number", value == "403")
|
||||
$observations: List() from collect (Entity(type == "clinical_observations"))
|
||||
then
|
||||
componentCreationService.joining("MortalityStatement.0.0", "Clinical_Observations", $observations, "\n");
|
||||
end
|
||||
|
||||
rule "BodyWeight.0.0: Bodyweight changes as sentences"
|
||||
when
|
||||
FileAttribute(label == "OECD Number", value == "403")
|
||||
$weightChanges: List() from collect (Entity(type == "bodyweight_changes"))
|
||||
then
|
||||
componentCreationService.joining("BodyWeight.0.0", "Body_Weight_Changes", $weightChanges, "\n");
|
||||
end
|
||||
|
||||
rule "Detailing.0.0: Detailing of reported changes as one block"
|
||||
when
|
||||
$oecdNumber: String() from List.of("404", "405")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$detailings: List() from collect (Entity(type == "detailing"))
|
||||
then
|
||||
componentCreationService.joining("Detailing.0.0", "Detailing_of_Reported_Changes", $detailings, " ");
|
||||
end
|
||||
|
||||
rule "Sex.0.0: Male sex found"
|
||||
when
|
||||
$oecdNumber: String() from List.of("405", "429")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$males: List(!isEmpty) from collect (Entity(type == "sex", (value.toLowerCase() == "male" || value.toLowerCase() == "males")))
|
||||
then
|
||||
componentCreationService.create("Sex.0.0", "Sex", "male", "male sex found", $males);
|
||||
end
|
||||
|
||||
rule "Sex.1.0: Female sex found"
|
||||
when
|
||||
$oecdNumber: String() from List.of("405", "429")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$females: List(!isEmpty) from collect (Entity(type == "sex", (value.toLowerCase() == "female" || value.toLowerCase() == "females")))
|
||||
then
|
||||
componentCreationService.create("Sex.0.0", "Sex", "female", "female sex found", $females);
|
||||
end
|
||||
|
||||
rule "NumberOfAnimals.0.0: Number of animals found"
|
||||
when
|
||||
$oecdNumber: String() from List.of("405", "429")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$numberOfAnimals: Entity(type == "number_of_animals")
|
||||
then
|
||||
componentCreationService.create("NumberOfAnimals.0.0", "Number_of_Animals", $numberOfAnimals.getValue(), "Number of animals found directly", $numberOfAnimals);
|
||||
end
|
||||
|
||||
rule "NumberOfAnimals.1.0: Count unique occurences of animals"
|
||||
when
|
||||
$oecdNumber: String() from List.of("405", "429")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
not Entity(type == "number_of_animals")
|
||||
$animals: List() from collect (Entity(type == "animal_number"))
|
||||
then
|
||||
componentCreationService.uniqueValueCount("NumberOfAnimals.1.0", "Number_of_Animals", $animals);
|
||||
end
|
||||
|
||||
rule "ClinicalSigns.0.0: Clinical signs as sentences"
|
||||
when
|
||||
$oecdNumber: String() from List.of("425")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$clinicalSigns: List() from collect (Entity(type == "clinical_signs"))
|
||||
then
|
||||
componentCreationService.joining("ClinicalSigns.0.0", "Clinical_Signs", $clinicalSigns, "\n");
|
||||
end
|
||||
|
||||
rule "DoseMortality.0.0: Dose mortality joined with dose from same table row"
|
||||
when
|
||||
$oecdNumber: String() from List.of("425")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$doseMortalities: List() from collect (Entity(type == "dose_mortality" || type == "dose_mortality_dose"))
|
||||
then
|
||||
componentCreationService.joiningFromSameTableRow("DoseMortality.0.0", "Dose_Mortality", $doseMortalities);
|
||||
end
|
||||
|
||||
rule "Mortality.0.0: Mortality as one block"
|
||||
when
|
||||
$oecdNumber: String() from List.of("425")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$mortalities: List() from collect (Entity(type == "mortality"))
|
||||
then
|
||||
componentCreationService.joining("Mortality.0.0", "Mortality", $mortalities, " ");
|
||||
end
|
||||
|
||||
rule "Dosages.0.0: First found value of Dosages"
|
||||
when
|
||||
$oecdNumber: String() from List.of("425")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$mortalities: List() from collect (Entity(type == "dosages"))
|
||||
then
|
||||
componentCreationService.firstOrElse("Dosages.0.0", "Dosages", $mortalities, "");
|
||||
end
|
||||
|
||||
rule "PrelimResults.0.0: Preliminary test results as sentences"
|
||||
when
|
||||
$oecdNumber: String() from List.of("429")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$results: List() from collect (Entity(type == "preliminary_test_results"))
|
||||
then
|
||||
componentCreationService.joining("PrelimResults.0.0", "Preliminary_Test_Results", $results, "\n");
|
||||
end
|
||||
|
||||
rule "TestResults.0.0: Test results as one block"
|
||||
when
|
||||
$oecdNumber: String() from List.of("429")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$results: List() from collect (Entity(type == "test_results"))
|
||||
then
|
||||
componentCreationService.joining("TestResults.0.0", "Test_Results", $results, " ");
|
||||
end
|
||||
|
||||
rule "PositiveControl.0.0: Was the definitive study conducted with positive control"
|
||||
when
|
||||
$oecdNumber: String() from List.of("429")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$results: List() from collect (Entity(type == "positive_control"))
|
||||
then
|
||||
componentCreationService.joining("PositiveControl.0.0", "Was_the_definitive_study_conducted_with_positive_control", $results, " ");
|
||||
end
|
||||
|
||||
rule "MainResults.0.0: Results from main study as one block"
|
||||
when
|
||||
$oecdNumber: String() from List.of("429")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$results: List() from collect (Entity(type == "results_(main_study)"))
|
||||
then
|
||||
componentCreationService.joining("MainResults.0.0", "Results_Main_Study", $results, " ");
|
||||
end
|
||||
|
||||
rule "UsedApproach.0.0: Used approach found and mapped to 'Group'"
|
||||
when
|
||||
$oecdNumber: String() from List.of("429")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
$results: List(!isEmpty()) from collect (Entity(type == "approach_used"))
|
||||
then
|
||||
componentCreationService.create("UsedApproach.0.0", "What_was_the_approach_used", "Group", "'Group' when approach used is present, else 'Individual'", $results);
|
||||
end
|
||||
|
||||
rule "UsedApproach.1.0: Used approach not found and thus 'Individual'"
|
||||
when
|
||||
$oecdNumber: String() from List.of("429")
|
||||
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||
not Entity(type == "approach_used")
|
||||
then
|
||||
componentCreationService.create("UsedApproach.1.0", "What_was_the_approach_used", "Individual", "'Group' when approach used is present, else 'Individual'");
|
||||
end
|
||||
|
||||
rule "DefaultComponents.999.0: Create components for all unmapped entities."
|
||||
salience -999
|
||||
when
|
||||
not FileAttribute(label == "OECD Number")
|
||||
$allEntities: List(!isEmpty()) from collect (Entity())
|
||||
then
|
||||
componentCreationService.createComponentsForUnMappedEntities("DefaultComponents.999.0", $allEntities);
|
||||
end
|
||||
|
||||
|
||||
//------------------------------------ Component merging rules ------------------------------------
|
||||
/*
|
||||
rule "X.0.0: merge duplicate component references"
|
||||
when
|
||||
$first: Component()
|
||||
$duplicate: Component(this != $first, name == $first.name, value == $first.value)
|
||||
then
|
||||
$first.getReferences().addAll($duplicate.getReferences());
|
||||
retract($duplicate);
|
||||
end
|
||||
*/
|
||||
@ -68,6 +68,15 @@ query "getFileAttributes"
|
||||
$fileAttribute: FileAttribute()
|
||||
end
|
||||
|
||||
//------------------------------------ T rules ------------------------------------
|
||||
rule "T.0.0: Create TableEntities for all Tables"
|
||||
when
|
||||
$table: Table()
|
||||
then
|
||||
Optional<TableEntity> tableEntity = entityCreationService.bySemanticNode($table, "Table", EntityType.ENTITY);
|
||||
tableEntity.ifPresent(t -> t.apply("T.0.0", "Table found.", "n-a"));
|
||||
end
|
||||
|
||||
//------------------------------------ H rules ------------------------------------
|
||||
|
||||
// Rule unit: H.0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user