Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a9fff497b5 | ||
|
|
c3e0aae800 | ||
|
|
db7debf0d4 | ||
|
|
229d1e98ac | ||
|
|
723263a7c4 |
@ -10,6 +10,7 @@ import java.util.Optional;
|
|||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.GenericSemanticNode;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.GenericSemanticNode;
|
||||||
@ -362,12 +363,15 @@ public class DocumentTree {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void addEntityToGraph(TextEntity entity) {
|
public void addEntityToGraph(SemanticEntity entity) {
|
||||||
|
|
||||||
getRoot().getNode().addThisToEntityIfIntersects(entity);
|
getRoot().getNode().addThisToEntityIfIntersects(entity);
|
||||||
|
|
||||||
TextBlock textBlock = entity.getDeepestFullyContainingNode().getTextBlock();
|
TextBlock textBlock = entity.getDeepestFullyContainingNode().getTextBlock();
|
||||||
EntityEnrichmentService.enrichEntity(entity, textBlock);
|
|
||||||
|
if (entity instanceof TextEntity textEntity) {
|
||||||
|
EntityEnrichmentService.enrichEntity(textEntity, textBlock);
|
||||||
|
}
|
||||||
|
|
||||||
EntityCreationUtility.addToPages(entity);
|
EntityCreationUtility.addToPages(entity);
|
||||||
EntityCreationUtility.addEntityToNodeEntitySets(entity);
|
EntityCreationUtility.addEntityToNodeEntitySets(entity);
|
||||||
@ -378,6 +382,7 @@ public class DocumentTree {
|
|||||||
|
|
||||||
entity.computeRelations();
|
entity.computeRelations();
|
||||||
entity.notifyEntityInserted();
|
entity.notifyEntityInserted();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -188,7 +188,7 @@ public interface IEntity {
|
|||||||
*
|
*
|
||||||
* @return A set of references.
|
* @return A set of references.
|
||||||
*/
|
*/
|
||||||
default Set<TextEntity> references() {
|
default Set<SemanticEntity> references() {
|
||||||
|
|
||||||
return getMatchedRule().getReferences();
|
return getMatchedRule().getReferences();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -40,7 +40,7 @@ public final class MatchedRule implements Comparable<MatchedRule> {
|
|||||||
boolean ignored;
|
boolean ignored;
|
||||||
|
|
||||||
@Builder.Default
|
@Builder.Default
|
||||||
Set<TextEntity> references = Collections.emptySet();
|
Set<SemanticEntity> references = Collections.emptySet();
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -0,0 +1,130 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.PriorityQueue;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.commons.collections4.map.HashedMap;
|
||||||
|
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||||
|
|
||||||
|
import lombok.AccessLevel;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.experimental.FieldDefaults;
|
||||||
|
import lombok.experimental.SuperBuilder;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@FieldDefaults(level = AccessLevel.PROTECTED)
|
||||||
|
@SuperBuilder
|
||||||
|
public abstract class SemanticEntity implements IEntity {
|
||||||
|
|
||||||
|
final EntityType entityType;
|
||||||
|
String type; // TODO: make final once ManualChangesApplicationService::recategorize is deleted
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
final PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
|
||||||
|
@Builder.Default
|
||||||
|
final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
Set<Page> pages = new HashSet<>();
|
||||||
|
List<PositionOnPage> positionsOnPagePerPage;
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
List<SemanticNode> intersectingNodes = new LinkedList<>();
|
||||||
|
SemanticNode deepestFullyContainingNode;
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
Map<SemanticEntity, Set<Relation>> relations = new HashMap<>();
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
Collection<EntityEventListener> entityEventListeners = new ArrayList<>();
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true when this entity is of EntityType ENTITY or HINT
|
||||||
|
*/
|
||||||
|
public boolean validEntityType() {
|
||||||
|
|
||||||
|
return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean valid() {
|
||||||
|
|
||||||
|
return active() && validEntityType();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public boolean isType(String type) {
|
||||||
|
|
||||||
|
return type().equals(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public boolean isAnyType(List<String> types) {
|
||||||
|
|
||||||
|
return types.contains(type());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public boolean matchesAnnotationId(String manualRedactionId) {
|
||||||
|
|
||||||
|
return getPositionsOnPagePerPage().stream()
|
||||||
|
.anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void addIntersectingNode(SemanticNode containingNode) {
|
||||||
|
|
||||||
|
intersectingNodes.add(containingNode);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void addEntityEventListener(EntityEventListener listener) {
|
||||||
|
|
||||||
|
entityEventListeners.add(listener);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void removeEntityEventListener(EntityEventListener listener) {
|
||||||
|
|
||||||
|
entityEventListeners.remove(listener);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<EntityEventListener> getEntityEventListeners() {
|
||||||
|
|
||||||
|
return entityEventListeners;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public abstract void computeRelations();
|
||||||
|
|
||||||
|
|
||||||
|
public void removeFromGraph() {
|
||||||
|
|
||||||
|
remove("FINAL.0.0", "removed completely");
|
||||||
|
intersectingNodes.forEach(node -> node.getEntities().remove(this));
|
||||||
|
pages.forEach(page -> page.getSemanticEntities().remove(this));
|
||||||
|
intersectingNodes = new LinkedList<>();
|
||||||
|
relations.keySet()
|
||||||
|
.forEach(entity -> entity.getRelations().remove(this));
|
||||||
|
relations = new HashedMap<>();
|
||||||
|
deepestFullyContainingNode = null;
|
||||||
|
pages = new HashSet<>();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,102 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||||
|
|
||||||
|
import java.awt.geom.Rectangle2D;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.PriorityQueue;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.commons.collections4.map.HashedMap;
|
||||||
|
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||||
|
|
||||||
|
import lombok.AccessLevel;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.EqualsAndHashCode;
|
||||||
|
import lombok.experimental.FieldDefaults;
|
||||||
|
import lombok.experimental.SuperBuilder;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@SuperBuilder
|
||||||
|
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||||
|
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
|
||||||
|
public class TableEntity extends SemanticEntity {
|
||||||
|
|
||||||
|
@EqualsAndHashCode.Include
|
||||||
|
final String id;
|
||||||
|
|
||||||
|
Table table;
|
||||||
|
|
||||||
|
|
||||||
|
public static TableEntity initialEntityNode(Table table, String type, EntityType entityType) {
|
||||||
|
|
||||||
|
return TableEntity.builder()
|
||||||
|
.id(table.buildId(table.getTextRange(), type, entityType))
|
||||||
|
.type(type)
|
||||||
|
.entityType(entityType)
|
||||||
|
.manualOverwrite(new ManualChangeOverwrite(entityType))
|
||||||
|
.table(table)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getValue() {
|
||||||
|
|
||||||
|
return "Table:" + table.getHeadline();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TextRange getTextRange() {
|
||||||
|
|
||||||
|
return table.getTextBlock().getTextRange();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String type() {
|
||||||
|
|
||||||
|
return getManualOverwrite().getType()
|
||||||
|
.orElse(NodeType.TABLE.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<PositionOnPage> getPositionsOnPagePerPage() {
|
||||||
|
|
||||||
|
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {
|
||||||
|
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = table.getTextBlock().getPositionsPerPage(table.getTextRange());
|
||||||
|
|
||||||
|
positionsOnPagePerPage = rectanglesPerLinePerPage.entrySet()
|
||||||
|
.stream()
|
||||||
|
.map(entry -> new PositionOnPage(id, entry.getKey(), entry.getValue()))
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
return positionsOnPagePerPage;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public String asCsv() {
|
||||||
|
|
||||||
|
return table.asCsv();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void computeRelations() {
|
||||||
|
// NO - OP
|
||||||
|
// can be implemented in the future
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -1,15 +1,11 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
package com.iqser.red.service.redaction.v1.server.model.document.entity;
|
||||||
|
|
||||||
import java.awt.geom.Rectangle2D;
|
import java.awt.geom.Rectangle2D;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.PriorityQueue;
|
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.commons.collections4.map.HashedMap;
|
import org.apache.commons.collections4.map.HashedMap;
|
||||||
@ -19,26 +15,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
|
|||||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||||
import com.iqser.red.service.redaction.v1.server.utils.IdBuilder;
|
|
||||||
|
|
||||||
import lombok.AccessLevel;
|
import lombok.AccessLevel;
|
||||||
import lombok.AllArgsConstructor;
|
|
||||||
import lombok.Builder;
|
import lombok.Builder;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.EqualsAndHashCode;
|
import lombok.EqualsAndHashCode;
|
||||||
import lombok.experimental.FieldDefaults;
|
import lombok.experimental.FieldDefaults;
|
||||||
|
import lombok.experimental.SuperBuilder;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents a text entity within a document, characterized by its text range, type, entity type,
|
* Represents a text entity within a document, characterized by its text range, type, entity type,
|
||||||
* and associated metadata like matched rules, pages, and engines.
|
* and associated metadata like matched rules, pages, and engines.
|
||||||
*/
|
*/
|
||||||
@Data
|
@Data
|
||||||
@Builder
|
@SuperBuilder
|
||||||
@AllArgsConstructor
|
|
||||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
|
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
|
||||||
@SuppressWarnings("PMD.AvoidFieldNameMatchingMethodName")
|
@SuppressWarnings("PMD.AvoidFieldNameMatchingMethodName")
|
||||||
public class TextEntity implements IEntity {
|
public class TextEntity extends SemanticEntity {
|
||||||
|
|
||||||
// primary key
|
// primary key
|
||||||
@EqualsAndHashCode.Include
|
@EqualsAndHashCode.Include
|
||||||
@ -48,13 +42,6 @@ public class TextEntity implements IEntity {
|
|||||||
TextRange textRange;
|
TextRange textRange;
|
||||||
@Builder.Default
|
@Builder.Default
|
||||||
Set<TextRange> duplicateTextRanges = new HashSet<>();
|
Set<TextRange> duplicateTextRanges = new HashSet<>();
|
||||||
String type; // TODO: make final once ManualChangesApplicationService::recategorize is deleted
|
|
||||||
final EntityType entityType;
|
|
||||||
|
|
||||||
@Builder.Default
|
|
||||||
final PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
|
|
||||||
@Builder.Default
|
|
||||||
final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
|
|
||||||
|
|
||||||
boolean dictionaryEntry;
|
boolean dictionaryEntry;
|
||||||
boolean dossierDictionaryEntry;
|
boolean dossierDictionaryEntry;
|
||||||
@ -66,24 +53,12 @@ public class TextEntity implements IEntity {
|
|||||||
String value;
|
String value;
|
||||||
String textBefore;
|
String textBefore;
|
||||||
String textAfter;
|
String textAfter;
|
||||||
@Builder.Default
|
|
||||||
Set<Page> pages = new HashSet<>();
|
|
||||||
List<PositionOnPage> positionsOnPagePerPage;
|
|
||||||
@Builder.Default
|
|
||||||
List<SemanticNode> intersectingNodes = new LinkedList<>();
|
|
||||||
SemanticNode deepestFullyContainingNode;
|
|
||||||
|
|
||||||
@Builder.Default
|
|
||||||
Map<TextEntity, Set<Relation>> relations = new HashMap<>();
|
|
||||||
|
|
||||||
@Builder.Default
|
|
||||||
Collection<EntityEventListener> entityEventListeners = new ArrayList<>();
|
|
||||||
|
|
||||||
|
|
||||||
public static TextEntity initialEntityNode(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
|
public static TextEntity initialEntityNode(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
|
||||||
|
|
||||||
return TextEntity.builder()
|
return TextEntity.builder()
|
||||||
.id(buildId(node, textRange, type, entityType))
|
.id(node.buildId(textRange, type, entityType))
|
||||||
.type(type)
|
.type(type)
|
||||||
.entityType(entityType)
|
.entityType(entityType)
|
||||||
.textRange(textRange)
|
.textRange(textRange)
|
||||||
@ -110,19 +85,6 @@ public class TextEntity implements IEntity {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static String buildId(SemanticNode node, TextRange textRange, String type, EntityType entityType) {
|
|
||||||
|
|
||||||
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = node.getPositionsPerPage(textRange);
|
|
||||||
return IdBuilder.buildId(rectanglesPerLinePerPage.keySet(),
|
|
||||||
rectanglesPerLinePerPage.values()
|
|
||||||
.stream()
|
|
||||||
.flatMap(Collection::stream)
|
|
||||||
.toList(),
|
|
||||||
type,
|
|
||||||
entityType.name());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public void addTextRange(TextRange textRange) {
|
public void addTextRange(TextRange textRange) {
|
||||||
|
|
||||||
duplicateTextRanges.add(textRange);
|
duplicateTextRanges.add(textRange);
|
||||||
@ -143,44 +105,13 @@ public class TextEntity implements IEntity {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public boolean isType(String type) {
|
|
||||||
|
|
||||||
return type().equals(type);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public boolean isAnyType(List<String> types) {
|
|
||||||
|
|
||||||
return types.contains(type());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public void addIntersectingNode(SemanticNode containingNode) {
|
|
||||||
|
|
||||||
intersectingNodes.add(containingNode);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public String getValueWithLineBreaks() {
|
public String getValueWithLineBreaks() {
|
||||||
|
|
||||||
return getDeepestFullyContainingNode().getTextBlock().subSequenceWithLineBreaks(getTextRange());
|
return getDeepestFullyContainingNode().getTextBlock().subSequenceWithLineBreaks(getTextRange());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void removeFromGraph() {
|
@Override
|
||||||
|
|
||||||
remove("FINAL.0.0", "removed completely");
|
|
||||||
intersectingNodes.forEach(node -> node.getEntities().remove(this));
|
|
||||||
pages.forEach(page -> page.getEntities().remove(this));
|
|
||||||
intersectingNodes = new LinkedList<>();
|
|
||||||
relations.keySet()
|
|
||||||
.forEach(entity -> entity.getRelations().remove(this));
|
|
||||||
relations = new HashedMap<>();
|
|
||||||
deepestFullyContainingNode = null;
|
|
||||||
pages = new HashSet<>();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public List<PositionOnPage> getPositionsOnPagePerPage() {
|
public List<PositionOnPage> getPositionsOnPagePerPage() {
|
||||||
|
|
||||||
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {
|
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {
|
||||||
@ -215,6 +146,7 @@ public class TextEntity implements IEntity {
|
|||||||
return textEntity.contains(this);
|
return textEntity.contains(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public boolean contains(TextEntity textEntity) {
|
public boolean contains(TextEntity textEntity) {
|
||||||
|
|
||||||
if (this.textRange.contains(textEntity.getTextRange())) {
|
if (this.textRange.contains(textEntity.getTextRange())) {
|
||||||
@ -239,7 +171,6 @@ public class TextEntity implements IEntity {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public boolean intersects(TextEntity textEntity) {
|
public boolean intersects(TextEntity textEntity) {
|
||||||
|
|
||||||
return this.textRange.intersects(textEntity.getTextRange()) //
|
return this.textRange.intersects(textEntity.getTextRange()) //
|
||||||
@ -277,14 +208,6 @@ public class TextEntity implements IEntity {
|
|||||||
notifyEntityUpdated();
|
notifyEntityUpdated();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public boolean matchesAnnotationId(String manualRedactionId) {
|
|
||||||
|
|
||||||
return getPositionsOnPagePerPage().stream()
|
|
||||||
.anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
|
||||||
@ -316,21 +239,6 @@ public class TextEntity implements IEntity {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return true when this entity is of EntityType ENTITY or HINT
|
|
||||||
*/
|
|
||||||
public boolean validEntityType() {
|
|
||||||
|
|
||||||
return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public boolean valid() {
|
|
||||||
|
|
||||||
return active() && validEntityType();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String value() {
|
public String value() {
|
||||||
|
|
||||||
@ -339,41 +247,32 @@ public class TextEntity implements IEntity {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addEntityEventListener(EntityEventListener listener) {
|
|
||||||
|
|
||||||
entityEventListeners.add(listener);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void removeEntityEventListener(EntityEventListener listener) {
|
|
||||||
|
|
||||||
entityEventListeners.remove(listener);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public void computeRelations() {
|
public void computeRelations() {
|
||||||
|
|
||||||
for (TextEntity textEntity : this.getDeepestFullyContainingNode().getEntities()) {
|
this.getDeepestFullyContainingNode().getEntities()
|
||||||
if (this.intersects(textEntity) && !this.equals(textEntity) && !textEntity.getEntityType().equals(EntityType.TEMPORARY)) {
|
.stream()
|
||||||
if (textEntity.getTextRange().equals(this.getTextRange())) {
|
.filter(semanticEntity -> semanticEntity instanceof TextEntity)
|
||||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Equality(this, textEntity));
|
.map(semanticEntity -> (TextEntity) semanticEntity)
|
||||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Equality(textEntity, this));
|
.forEach(textEntity -> {
|
||||||
} else if (textEntity.containedBy(this)) {
|
|
||||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
|
if (this.intersects(textEntity) && !this.equals(textEntity) && !textEntity.getEntityType().equals(EntityType.TEMPORARY)) {
|
||||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Containment(this, textEntity));
|
if (textEntity.getTextRange().equals(this.getTextRange())) {
|
||||||
} else if (this.containedBy(textEntity)) {
|
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Equality(this, textEntity));
|
||||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Containment(textEntity, this));
|
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Equality(textEntity, this));
|
||||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
|
} else if (textEntity.containedBy(this)) {
|
||||||
} else {
|
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
|
||||||
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
|
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Containment(this, textEntity));
|
||||||
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
|
} else if (this.containedBy(textEntity)) {
|
||||||
}
|
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Containment(textEntity, this));
|
||||||
|
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
|
||||||
|
} else {
|
||||||
|
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
|
||||||
|
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -7,6 +7,7 @@ import java.util.Map;
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||||
|
|
||||||
@ -38,7 +39,7 @@ public abstract class AbstractSemanticNode implements GenericSemanticNode {
|
|||||||
DocumentTree documentTree;
|
DocumentTree documentTree;
|
||||||
|
|
||||||
@Builder.Default
|
@Builder.Default
|
||||||
Set<TextEntity> entities = new HashSet<>();
|
Set<SemanticEntity> entities = new HashSet<>();
|
||||||
|
|
||||||
Map<Page, Rectangle2D> bBoxCache;
|
Map<Page, Rectangle2D> bBoxCache;
|
||||||
|
|
||||||
@ -61,7 +62,7 @@ public abstract class AbstractSemanticNode implements GenericSemanticNode {
|
|||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<Page, Rectangle2D> getBBox() {
|
public Map<Page, Rectangle2D>getBBox() {
|
||||||
|
|
||||||
if (bBoxCache == null) {
|
if (bBoxCache == null) {
|
||||||
bBoxCache = GenericSemanticNode.super.getBBox();
|
bBoxCache = GenericSemanticNode.super.getBBox();
|
||||||
|
|||||||
@ -5,6 +5,7 @@ import java.util.List;
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||||
@ -42,7 +43,7 @@ public class Page {
|
|||||||
Footer footer;
|
Footer footer;
|
||||||
|
|
||||||
@Builder.Default
|
@Builder.Default
|
||||||
Set<TextEntity> entities = new HashSet<>();
|
Set<SemanticEntity> semanticEntities = new HashSet<>();
|
||||||
|
|
||||||
@Builder.Default
|
@Builder.Default
|
||||||
Set<Image> images = new HashSet<>();
|
Set<Image> images = new HashSet<>();
|
||||||
|
|||||||
@ -1,6 +1,9 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||||
|
|
||||||
import lombok.AccessLevel;
|
import lombok.AccessLevel;
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
|
|||||||
@ -4,6 +4,7 @@ import static java.lang.String.format;
|
|||||||
|
|
||||||
import java.awt.geom.Rectangle2D;
|
import java.awt.geom.Rectangle2D;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@ -17,12 +18,16 @@ import java.util.stream.Stream;
|
|||||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConsecutiveTextBlockCollector;
|
import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConsecutiveTextBlockCollector;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.utils.IdBuilder;
|
||||||
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
|
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
|
||||||
import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility;
|
import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility;
|
||||||
|
|
||||||
@ -74,7 +79,25 @@ public interface SemanticNode {
|
|||||||
*
|
*
|
||||||
* @return Set of all Entities associated with this Node
|
* @return Set of all Entities associated with this Node
|
||||||
*/
|
*/
|
||||||
Set<TextEntity> getEntities();
|
Set<SemanticEntity> getEntities();
|
||||||
|
|
||||||
|
|
||||||
|
default Set<TextEntity> getTextEntities() {
|
||||||
|
|
||||||
|
return getEntities().stream()
|
||||||
|
.filter(semanticEntity -> semanticEntity instanceof TextEntity)
|
||||||
|
.map(semanticEntity -> (TextEntity) semanticEntity)
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
default Set<TableEntity> getTableEntities() {
|
||||||
|
|
||||||
|
return getEntities().stream()
|
||||||
|
.filter(semanticEntity -> semanticEntity instanceof TableEntity)
|
||||||
|
.map(semanticEntity -> (TableEntity) semanticEntity)
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -85,9 +108,9 @@ public interface SemanticNode {
|
|||||||
*/
|
*/
|
||||||
default Stream<TextEntity> streamValidEntities() {
|
default Stream<TextEntity> streamValidEntities() {
|
||||||
|
|
||||||
return getEntities().stream()
|
return getTextEntities().stream()
|
||||||
.filter(IEntity::active)
|
.filter(IEntity::active)
|
||||||
.filter(TextEntity::validEntityType);
|
.filter(SemanticEntity::validEntityType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -638,18 +661,18 @@ public interface SemanticNode {
|
|||||||
* This function is used during insertion of EntityNodes into the graph, it checks if the TextRange of the RedactionEntity intersects or even contains the RedactionEntity.
|
* This function is used during insertion of EntityNodes into the graph, it checks if the TextRange of the RedactionEntity intersects or even contains the RedactionEntity.
|
||||||
* It sets the fields accordingly and recursively calls this function on all its children.
|
* It sets the fields accordingly and recursively calls this function on all its children.
|
||||||
*
|
*
|
||||||
* @param textEntity RedactionEntity, which is being inserted into the graph
|
* @param entity RedactionEntity, which is being inserted into the graph
|
||||||
*/
|
*/
|
||||||
default void addThisToEntityIfIntersects(TextEntity textEntity) {
|
default void addThisToEntityIfIntersects(SemanticEntity entity) {
|
||||||
|
|
||||||
TextBlock textBlock = getTextBlock();
|
TextBlock textBlock = getTextBlock();
|
||||||
if (textBlock.getTextRange().intersects(textEntity.getTextRange())) {
|
if (textBlock.getTextRange().intersects(entity.getTextRange())) {
|
||||||
if (textBlock.containsTextRange(textEntity.getTextRange())) {
|
if (textBlock.containsTextRange(entity.getTextRange())) {
|
||||||
textEntity.setDeepestFullyContainingNode(this);
|
entity.setDeepestFullyContainingNode(this);
|
||||||
}
|
}
|
||||||
textEntity.addIntersectingNode(this);
|
entity.addIntersectingNode(this);
|
||||||
getDocumentTree().findIntersectingChildNodes(getTreeId(), textEntity.getTextRange())
|
getDocumentTree().findIntersectingChildNodes(getTreeId(), entity.getTextRange())
|
||||||
.forEach(node -> node.addThisToEntityIfIntersects(textEntity));
|
.forEach(node -> node.addThisToEntityIfIntersects(entity));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -838,4 +861,17 @@ public interface SemanticNode {
|
|||||||
return pages.size() == 1 && pages.contains(page);
|
return pages.size() == 1 && pages.contains(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
default String buildId(TextRange textRange, String type, EntityType entityType) {
|
||||||
|
|
||||||
|
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = getPositionsPerPage(textRange);
|
||||||
|
return IdBuilder.buildId(rectanglesPerLinePerPage.keySet(),
|
||||||
|
rectanglesPerLinePerPage.values()
|
||||||
|
.stream()
|
||||||
|
.flatMap(Collection::stream)
|
||||||
|
.toList(),
|
||||||
|
type,
|
||||||
|
entityType.name());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -15,6 +15,7 @@ import java.util.stream.Stream;
|
|||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||||
@ -48,7 +49,7 @@ public class Table implements SemanticNode {
|
|||||||
TextBlock textBlock;
|
TextBlock textBlock;
|
||||||
|
|
||||||
@Builder.Default
|
@Builder.Default
|
||||||
Set<TextEntity> entities = new HashSet<>();
|
Set<SemanticEntity> entities = new HashSet<>();
|
||||||
|
|
||||||
Map<Page, Rectangle2D> bBoxCache;
|
Map<Page, Rectangle2D> bBoxCache;
|
||||||
|
|
||||||
@ -109,7 +110,7 @@ public class Table implements SemanticNode {
|
|||||||
.toList();
|
.toList();
|
||||||
return streamTableCells().filter(tableCellNode -> vertebrateStudyCols.stream()
|
return streamTableCells().filter(tableCellNode -> vertebrateStudyCols.stream()
|
||||||
.anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value)))
|
.anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value)))
|
||||||
.map(TableCell::getEntities)
|
.map(TableCell::getTextEntities)
|
||||||
.flatMap(Collection::stream);
|
.flatMap(Collection::stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -128,7 +129,7 @@ public class Table implements SemanticNode {
|
|||||||
.toList();
|
.toList();
|
||||||
return streamTableCells().filter(tableCellNode -> colsWithHeader.stream()
|
return streamTableCells().filter(tableCellNode -> colsWithHeader.stream()
|
||||||
.anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values)))
|
.anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values)))
|
||||||
.map(TableCell::getEntities)
|
.map(TableCell::getTextEntities)
|
||||||
.flatMap(Collection::stream);
|
.flatMap(Collection::stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -146,7 +147,7 @@ public class Table implements SemanticNode {
|
|||||||
.filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type)
|
.filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type)
|
||||||
.anyMatch(types::contains))
|
.anyMatch(types::contains))
|
||||||
.flatMap(this::streamRow)
|
.flatMap(this::streamRow)
|
||||||
.map(TableCell::getEntities)
|
.map(TableCell::getTextEntities)
|
||||||
.flatMap(Collection::stream);
|
.flatMap(Collection::stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -167,7 +168,7 @@ public class Table implements SemanticNode {
|
|||||||
return entityTypes.containsAll(types);
|
return entityTypes.containsAll(types);
|
||||||
})
|
})
|
||||||
.flatMap(this::streamRow)
|
.flatMap(this::streamRow)
|
||||||
.map(TableCell::getEntities)
|
.map(TableCell::getTextEntities)
|
||||||
.flatMap(Collection::stream);
|
.flatMap(Collection::stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -185,7 +186,7 @@ public class Table implements SemanticNode {
|
|||||||
.filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type)
|
.filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type)
|
||||||
.noneMatch(types::contains))
|
.noneMatch(types::contains))
|
||||||
.flatMap(this::streamRow)
|
.flatMap(this::streamRow)
|
||||||
.map(TableCell::getEntities)
|
.map(TableCell::getTextEntities)
|
||||||
.flatMap(Collection::stream);
|
.flatMap(Collection::stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -426,4 +427,30 @@ public class Table implements SemanticNode {
|
|||||||
visitor.visit(this);
|
visitor.visit(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public String asCsv() {
|
||||||
|
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
|
||||||
|
for (int row = 0; row < numberOfRows; row++) {
|
||||||
|
for (int col = 0; col < numberOfCols; col++) {
|
||||||
|
TableCell cell = getCell(row, col);
|
||||||
|
String cellText = cell.getTextBlock().getSearchText().replaceAll("\\r?\\n", " ").trim();
|
||||||
|
|
||||||
|
if (cellText.contains(",") || cellText.contains("\"")) {
|
||||||
|
cellText = "\"" + cellText.replace("\"", "\"\"") + "\"";
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.append(cellText);
|
||||||
|
|
||||||
|
if (col < numberOfCols - 1) {
|
||||||
|
sb.append(",");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sb.append("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -5,6 +5,8 @@ import java.util.Set;
|
|||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.IntersectingNodeVisitor;
|
import com.iqser.red.service.redaction.v1.server.model.document.IntersectingNodeVisitor;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||||
@ -50,15 +52,15 @@ public class EntityCreationUtility {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void addToPages(TextEntity entity) {
|
public void addToPages(SemanticEntity entity) {
|
||||||
|
|
||||||
Set<Page> pages = entity.getDeepestFullyContainingNode().getPages(entity.getTextRange());
|
Set<Page> pages = entity.getDeepestFullyContainingNode().getPages(entity.getTextRange());
|
||||||
entity.getPages().addAll(pages);
|
entity.getPages().addAll(pages);
|
||||||
pages.forEach(page -> page.getEntities().add(entity));
|
pages.forEach(page -> page.getSemanticEntities().add(entity));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void addEntityToNodeEntitySets(TextEntity entity) {
|
public void addEntityToNodeEntitySets(SemanticEntity entity) {
|
||||||
|
|
||||||
entity.getIntersectingNodes()
|
entity.getIntersectingNodes()
|
||||||
.forEach(node -> node.getEntities().add(entity));
|
.forEach(node -> node.getEntities().add(entity));
|
||||||
|
|||||||
@ -20,12 +20,15 @@ public class EntityEnrichmentService {
|
|||||||
|
|
||||||
|
|
||||||
public void enrichEntity(TextEntity entity, TextBlock textBlock) {
|
public void enrichEntity(TextEntity entity, TextBlock textBlock) {
|
||||||
|
|
||||||
entity.setValue(textBlock.subSequence(entity.getTextRange()).toString());
|
entity.setValue(textBlock.subSequence(entity.getTextRange()).toString());
|
||||||
entity.setTextAfter(findTextAfter(entity.getTextRange().end(), textBlock));
|
entity.setTextAfter(findTextAfter(entity.getTextRange().end(), textBlock));
|
||||||
entity.setTextBefore(findTextBefore(entity.getTextRange().start(), textBlock));
|
entity.setTextBefore(findTextBefore(entity.getTextRange().start(), textBlock));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private String findTextAfter(int index, TextBlock textBlock) {
|
private String findTextAfter(int index, TextBlock textBlock) {
|
||||||
|
|
||||||
int endOffset = Math.min(index + SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().end());
|
int endOffset = Math.min(index + SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().end());
|
||||||
String textAfter = textBlock.subSequence(index, endOffset).toString();
|
String textAfter = textBlock.subSequence(index, endOffset).toString();
|
||||||
if (!textAfter.isBlank()) {
|
if (!textAfter.isBlank()) {
|
||||||
@ -38,7 +41,9 @@ public class EntityEnrichmentService {
|
|||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private String findTextBefore(int index, TextBlock textBlock) {
|
private String findTextBefore(int index, TextBlock textBlock) {
|
||||||
|
|
||||||
int offsetBefore = Math.max(index - SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().start());
|
int offsetBefore = Math.max(index - SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().start());
|
||||||
String textBefore = textBlock.subSequence(offsetBefore, index).toString();
|
String textBefore = textBlock.subSequence(offsetBefore, index).toString();
|
||||||
if (!textBefore.isBlank()) {
|
if (!textBefore.isBlank()) {
|
||||||
@ -51,13 +56,17 @@ public class EntityEnrichmentService {
|
|||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private List<String> splitToWordsAndRemoveEmptyWords(String text) {
|
private List<String> splitToWordsAndRemoveEmptyWords(String text) {
|
||||||
|
|
||||||
return Arrays.stream(text.split(" "))
|
return Arrays.stream(text.split(" "))
|
||||||
.filter(word -> !Objects.equals("", word))
|
.filter(word -> !Objects.equals("", word))
|
||||||
.toList();
|
.toList();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private String concatWordsBefore(List<String> words, boolean endWithSpace) {
|
private String concatWordsBefore(List<String> words, boolean endWithSpace) {
|
||||||
|
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
for (String word : words) {
|
for (String word : words) {
|
||||||
sb.append(word).append(" ");
|
sb.append(word).append(" ");
|
||||||
@ -66,7 +75,9 @@ public class EntityEnrichmentService {
|
|||||||
return endWithSpace ? result + " " : result;
|
return endWithSpace ? result + " " : result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private String concatWordsAfter(List<String> words, boolean startWithSpace) {
|
private String concatWordsAfter(List<String> words, boolean startWithSpace) {
|
||||||
|
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
for (String word : words) {
|
for (String word : words) {
|
||||||
sb.append(word).append(" ");
|
sb.append(word).append(" ");
|
||||||
|
|||||||
@ -26,6 +26,9 @@ public class Component {
|
|||||||
|
|
||||||
List<Entity> references;
|
List<Entity> references;
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
ComponentFormat componentFormat = ComponentFormat.TEXT;
|
||||||
|
|
||||||
|
|
||||||
public boolean addReference(Entity entity) {
|
public boolean addReference(Entity entity) {
|
||||||
|
|
||||||
|
|||||||
@ -0,0 +1,7 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.model.component;
|
||||||
|
|
||||||
|
public enum ComponentFormat {
|
||||||
|
|
||||||
|
TEXT,
|
||||||
|
CSV
|
||||||
|
}
|
||||||
@ -36,6 +36,8 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityTyp
|
|||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
||||||
@ -70,11 +72,11 @@ public class EntityLogCreatorService {
|
|||||||
ObservationRegistry observationRegistry;
|
ObservationRegistry observationRegistry;
|
||||||
|
|
||||||
|
|
||||||
private static boolean notFalsePositiveOrFalseRecommendationOrRemoval(TextEntity textEntity) {
|
private static boolean notFalsePositiveOrFalseRecommendationOrRemoval(SemanticEntity semanticEntity) {
|
||||||
|
|
||||||
return !(textEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) //
|
return !(semanticEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) //
|
||||||
|| textEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION) //
|
|| semanticEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION) //
|
||||||
|| textEntity.getEntityType().equals(EntityType.DICTIONARY_REMOVAL));
|
|| semanticEntity.getEntityType().equals(EntityType.DICTIONARY_REMOVAL));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -175,7 +177,7 @@ public class EntityLogCreatorService {
|
|||||||
|
|
||||||
List<EntityLogEntry> entries = new ArrayList<>();
|
List<EntityLogEntry> entries = new ArrayList<>();
|
||||||
|
|
||||||
List<TextEntity> textEntities = document.getEntities()
|
List<SemanticEntity> semanticEntities = document.getEntities()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(entity -> !entity.getValue().isEmpty())
|
.filter(entity -> !entity.getValue().isEmpty())
|
||||||
.filter(EntityLogCreatorService::notFalsePositiveOrFalseRecommendationOrRemoval)
|
.filter(EntityLogCreatorService::notFalsePositiveOrFalseRecommendationOrRemoval)
|
||||||
@ -190,7 +192,7 @@ public class EntityLogCreatorService {
|
|||||||
.toList();
|
.toList();
|
||||||
|
|
||||||
List<String> allIds = new ArrayList<>();
|
List<String> allIds = new ArrayList<>();
|
||||||
allIds.addAll(textEntities.stream()
|
allIds.addAll(semanticEntities.stream()
|
||||||
.flatMap(entity -> entity.getPositionsOnPagePerPage()
|
.flatMap(entity -> entity.getPositionsOnPagePerPage()
|
||||||
.stream()
|
.stream()
|
||||||
.map(PositionOnPage::getId))
|
.map(PositionOnPage::getId))
|
||||||
@ -204,7 +206,7 @@ public class EntityLogCreatorService {
|
|||||||
|
|
||||||
Map<String, List<ManualChange>> manualChangesMap = getManualChangesByEntityLogIds(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), allIds);
|
Map<String, List<ManualChange>> manualChangesMap = getManualChangesByEntityLogIds(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), allIds);
|
||||||
|
|
||||||
textEntities.forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode, analysisNumber, manualChangesMap)));
|
semanticEntities.forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode, analysisNumber, manualChangesMap)));
|
||||||
|
|
||||||
images.forEach(imageNode -> entries.add(createEntityLogEntry(imageNode,
|
images.forEach(imageNode -> entries.add(createEntityLogEntry(imageNode,
|
||||||
dossierTemplateId,
|
dossierTemplateId,
|
||||||
@ -219,19 +221,19 @@ public class EntityLogCreatorService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public List<EntityLogEntry> toEntityLogEntries(TextEntity textEntity, int analysisNumber, Map<String, List<ManualChange>> existingManualChangesMap) {
|
public List<EntityLogEntry> toEntityLogEntries(SemanticEntity semanticEntity, int analysisNumber, Map<String, List<ManualChange>> existingManualChangesMap) {
|
||||||
|
|
||||||
List<EntityLogEntry> entityLogEntries = new ArrayList<>();
|
List<EntityLogEntry> entityLogEntries = new ArrayList<>();
|
||||||
|
|
||||||
// split entity into multiple entries if it occurs on multiple pages, since FE can't handle multi page entities
|
// split entity into multiple entries if it occurs on multiple pages, since FE can't handle multi page entities
|
||||||
for (PositionOnPage positionOnPage : textEntity.getPositionsOnPagePerPage()) {
|
for (PositionOnPage positionOnPage : semanticEntity.getPositionsOnPagePerPage()) {
|
||||||
|
|
||||||
List<Position> rectanglesPerLine = positionOnPage.getRectanglePerLine()
|
List<Position> rectanglesPerLine = positionOnPage.getRectanglePerLine()
|
||||||
.stream()
|
.stream()
|
||||||
.map(rectangle2D -> new Position(rectangle2D, positionOnPage.getPage().getNumber()))
|
.map(rectangle2D -> new Position(rectangle2D, positionOnPage.getPage().getNumber()))
|
||||||
.toList();
|
.toList();
|
||||||
|
|
||||||
EntityLogEntry entityLogEntry = createEntityLogEntry(textEntity, analysisNumber, existingManualChangesMap.getOrDefault(positionOnPage.getId(), new ArrayList<>()));
|
EntityLogEntry entityLogEntry = createEntityLogEntry(semanticEntity, analysisNumber, existingManualChangesMap.getOrDefault(positionOnPage.getId(), new ArrayList<>()));
|
||||||
|
|
||||||
// set the ID from the positions, since it might contain a "-" with the page number if the entity is split across multiple pages
|
// set the ID from the positions, since it might contain a "-" with the page number if the entity is split across multiple pages
|
||||||
entityLogEntry.setId(positionOnPage.getId());
|
entityLogEntry.setId(positionOnPage.getId());
|
||||||
@ -317,12 +319,24 @@ public class EntityLogCreatorService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private EntityLogEntry createEntityLogEntry(SemanticEntity entity, int analysisNumber, List<ManualChange> existingManualChanges) {
|
||||||
|
|
||||||
|
if (entity instanceof TextEntity textEntity) {
|
||||||
|
return createEntityLogEntry(textEntity, analysisNumber, existingManualChanges);
|
||||||
|
} else if (entity instanceof TableEntity tableEntity) {
|
||||||
|
return createEntityLogEntry(tableEntity, analysisNumber, existingManualChanges);
|
||||||
|
} else {
|
||||||
|
throw new UnsupportedOperationException("Wrong semantic entity type given for entity log entry creation!!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private EntityLogEntry createEntityLogEntry(TextEntity entity, int analysisNumber, List<ManualChange> existingManualChanges) {
|
private EntityLogEntry createEntityLogEntry(TextEntity entity, int analysisNumber, List<ManualChange> existingManualChanges) {
|
||||||
|
|
||||||
Set<String> referenceIds = new HashSet<>();
|
Set<String> referenceIds = new HashSet<>();
|
||||||
entity.references()
|
entity.references()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(TextEntity::active)
|
.filter(SemanticEntity::active)
|
||||||
.forEach(ref -> ref.getPositionsOnPagePerPage()
|
.forEach(ref -> ref.getPositionsOnPagePerPage()
|
||||||
.forEach(pos -> referenceIds.add(pos.getId())));
|
.forEach(pos -> referenceIds.add(pos.getId())));
|
||||||
|
|
||||||
@ -365,7 +379,42 @@ public class EntityLogCreatorService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private int determinePageParagraphIndex(TextEntity textEntity, EntryType entryType) {
|
private EntityLogEntry createEntityLogEntry(TableEntity tableEntity, int analysisNumber, List<ManualChange> existingManualChanges) {
|
||||||
|
|
||||||
|
Set<String> referenceIds = new HashSet<>();
|
||||||
|
tableEntity.references()
|
||||||
|
.stream()
|
||||||
|
.filter(IEntity::applied)
|
||||||
|
.forEach(ref -> ref.getPositionsOnPagePerPage()
|
||||||
|
.forEach(pos -> referenceIds.add(pos.getId())));
|
||||||
|
|
||||||
|
EntryType entryType = buildEntryType(tableEntity);
|
||||||
|
|
||||||
|
List<ManualChange> allManualChanges = ManualChangeFactory.toLocalManualChangeList(tableEntity.getManualOverwrite().getManualChangeLog(), true, analysisNumber);
|
||||||
|
|
||||||
|
return EntityLogEntry.builder()
|
||||||
|
.reason(tableEntity.buildReason())
|
||||||
|
.legalBasis(tableEntity.legalBasis())
|
||||||
|
.value(tableEntity.getValue())
|
||||||
|
.type(tableEntity.type())
|
||||||
|
.section(tableEntity.getManualOverwrite().getSection()
|
||||||
|
.orElse(this.buildSectionString(tableEntity.getDeepestFullyContainingNode())))
|
||||||
|
.containingNodeId(tableEntity.getDeepestFullyContainingNode().getTreeId())
|
||||||
|
.closestHeadline(tableEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText())
|
||||||
|
.matchedRule(tableEntity.getMatchedRule().getRuleIdentifier().toString())
|
||||||
|
.startOffset(tableEntity.getTextRange().start())
|
||||||
|
.endOffset(tableEntity.getTextRange().end())
|
||||||
|
// .engines(getEngines(tableEntity.getEngines(), tableEntity.getManualOverwrite()))
|
||||||
|
.reference(referenceIds)
|
||||||
|
.manualChanges(ManualChangesUtils.mergeManualChanges(existingManualChanges, allManualChanges))
|
||||||
|
.state(buildEntryState(tableEntity))
|
||||||
|
.entryType(entryType)
|
||||||
|
.paragraphPageIdx(determinePageParagraphIndex(tableEntity, entryType))
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private int determinePageParagraphIndex(SemanticEntity textEntity, EntryType entryType) {
|
||||||
|
|
||||||
int pageParagraphIdx = -1;
|
int pageParagraphIdx = -1;
|
||||||
|
|
||||||
@ -414,7 +463,7 @@ public class EntityLogCreatorService {
|
|||||||
|
|
||||||
public static EntryType buildEntryType(IEntity entity) {
|
public static EntryType buildEntryType(IEntity entity) {
|
||||||
|
|
||||||
if (entity instanceof TextEntity textEntity) {
|
if (entity instanceof SemanticEntity textEntity) {
|
||||||
return getEntryType(textEntity.getEntityType());
|
return getEntryType(textEntity.getEntityType());
|
||||||
} else if (entity instanceof PrecursorEntity precursorEntity) {
|
} else if (entity instanceof PrecursorEntity precursorEntity) {
|
||||||
if (precursorEntity.isRectangle()) {
|
if (precursorEntity.isRectangle()) {
|
||||||
|
|||||||
@ -18,8 +18,6 @@ import com.google.common.collect.Sets;
|
|||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
|
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||||
@ -146,9 +144,9 @@ public class ManualChangesApplicationService {
|
|||||||
Set<Page> newIntersectingPages = new HashSet<>(closestEntity.getPages());
|
Set<Page> newIntersectingPages = new HashSet<>(closestEntity.getPages());
|
||||||
|
|
||||||
Sets.difference(currentIntersectingPages, newIntersectingPages)
|
Sets.difference(currentIntersectingPages, newIntersectingPages)
|
||||||
.forEach(removedPage -> removedPage.getEntities().remove(entityToBeResized));
|
.forEach(removedPage -> removedPage.getSemanticEntities().remove(entityToBeResized));
|
||||||
Sets.difference(newIntersectingPages, currentIntersectingPages)
|
Sets.difference(newIntersectingPages, currentIntersectingPages)
|
||||||
.forEach(addedPage -> addedPage.getEntities().add(entityToBeResized));
|
.forEach(addedPage -> addedPage.getSemanticEntities().add(entityToBeResized));
|
||||||
|
|
||||||
entityToBeResized.setDeepestFullyContainingNode(closestEntity.getDeepestFullyContainingNode());
|
entityToBeResized.setDeepestFullyContainingNode(closestEntity.getDeepestFullyContainingNode());
|
||||||
entityToBeResized.setIntersectingNodes(new ArrayList<>(newIntersectingNodes));
|
entityToBeResized.setIntersectingNodes(new ArrayList<>(newIntersectingNodes));
|
||||||
|
|||||||
@ -102,9 +102,9 @@ public class UnprocessedChangesService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
document.getEntities()
|
document.getEntities()
|
||||||
.forEach(textEntity -> {
|
.forEach(entity -> {
|
||||||
Set<String> processedIds = new HashSet<>();
|
Set<String> processedIds = new HashSet<>();
|
||||||
for (var positionsOnPerPage : textEntity.getPositionsOnPagePerPage()) {
|
for (var positionsOnPerPage : entity.getPositionsOnPagePerPage()) {
|
||||||
if (processedIds.contains(positionsOnPerPage.getId())) {
|
if (processedIds.contains(positionsOnPerPage.getId())) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -113,17 +113,18 @@ public class UnprocessedChangesService {
|
|||||||
.stream()
|
.stream()
|
||||||
.map(rectangle2D -> new Position(rectangle2D, positionsOnPerPage.getPage().getNumber()))
|
.map(rectangle2D -> new Position(rectangle2D, positionsOnPerPage.getPage().getNumber()))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
unprocessedManualEntities.add(UnprocessedManualEntity.builder()
|
UnprocessedManualEntity.UnprocessedManualEntityBuilder builder = UnprocessedManualEntity.builder()
|
||||||
.annotationId(allAnnotationIds.stream()
|
.annotationId(allAnnotationIds.stream()
|
||||||
.filter(textEntity::matchesAnnotationId)
|
.filter(entity::matchesAnnotationId)
|
||||||
.findFirst()
|
.findFirst()
|
||||||
.orElse(""))
|
.orElse(""))
|
||||||
.textBefore(textEntity.getTextBefore())
|
.section(entity.getManualOverwrite().getSection()
|
||||||
.textAfter(textEntity.getTextAfter())
|
.orElse(entity.getDeepestFullyContainingNode().toString()))
|
||||||
.section(textEntity.getManualOverwrite().getSection()
|
.positions(positions);
|
||||||
.orElse(textEntity.getDeepestFullyContainingNode().toString()))
|
if (entity instanceof TextEntity textEntity) {
|
||||||
.positions(positions)
|
builder.textBefore(textEntity.getTextBefore()).textAfter(textEntity.getTextAfter());
|
||||||
.build());
|
}
|
||||||
|
unprocessedManualEntities.add(builder.build());
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@ -23,8 +23,11 @@ import java.util.stream.Stream;
|
|||||||
import org.kie.api.runtime.KieSession;
|
import org.kie.api.runtime.KieSession;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.model.component.Component;
|
import com.iqser.red.service.redaction.v1.server.model.component.Component;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.component.ComponentFormat;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.component.Entity;
|
import com.iqser.red.service.redaction.v1.server.model.component.Entity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.RuleIdentifier;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.RuleIdentifier;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
||||||
import com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils;
|
import com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils;
|
||||||
import com.iqser.red.service.redaction.v1.server.utils.DateConverter;
|
import com.iqser.red.service.redaction.v1.server.utils.DateConverter;
|
||||||
@ -98,6 +101,22 @@ public class ComponentCreationService {
|
|||||||
.value(value)
|
.value(value)
|
||||||
.valueDescription(valueDescription)
|
.valueDescription(valueDescription)
|
||||||
.references(new LinkedList<>(references))
|
.references(new LinkedList<>(references))
|
||||||
|
.componentFormat(ComponentFormat.TEXT)
|
||||||
|
.build());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void create(String ruleIdentifier, String name, String value, String valueDescription, Collection<Entity> references, ComponentFormat componentFormat) {
|
||||||
|
|
||||||
|
referencedEntities.addAll(references);
|
||||||
|
|
||||||
|
kieSession.insert(Component.builder()
|
||||||
|
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
|
||||||
|
.name(name)
|
||||||
|
.value(value)
|
||||||
|
.valueDescription(valueDescription)
|
||||||
|
.references(new LinkedList<>(references))
|
||||||
|
.componentFormat(componentFormat)
|
||||||
.build());
|
.build());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -376,6 +395,27 @@ public class ComponentCreationService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void createComponentForTables(String ruleIdentifier, Collection<Entity> entities) {
|
||||||
|
|
||||||
|
entities.stream()
|
||||||
|
.filter(entity -> !referencedEntities.contains(entity))
|
||||||
|
.sorted(EntityComparators.first())
|
||||||
|
.forEach(entity -> {
|
||||||
|
String value = entity.getValue();
|
||||||
|
ComponentFormat componentFormat = ComponentFormat.TEXT;
|
||||||
|
SemanticNode containingNode = entity.getContainingNode();
|
||||||
|
|
||||||
|
if (containingNode instanceof TableCell cell) { // case for tables with 0 rows and 0 columns
|
||||||
|
value = cell.getTextBlock().getSearchText();
|
||||||
|
} else if (containingNode instanceof Table table) {
|
||||||
|
value = table.asCsv();
|
||||||
|
componentFormat = ComponentFormat.CSV;
|
||||||
|
}
|
||||||
|
create(ruleIdentifier, entity.getType(), value, "Table Entity", List.of(entity), componentFormat);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts entity values to the 'dd/MM/yyyy' format and joins them with ', '. If the value could not be parsed as a date, it will be created as is.
|
* Converts entity values to the 'dd/MM/yyyy' format and joins them with ', '. If the value could not be parsed as a date, it will be created as is.
|
||||||
*
|
*
|
||||||
|
|||||||
@ -22,7 +22,9 @@ import com.iqser.red.service.redaction.v1.server.model.document.ConsecutiveBound
|
|||||||
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||||
@ -907,6 +909,34 @@ public class EntityCreationService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a table entity based on the document table.
|
||||||
|
*
|
||||||
|
* @param table The table to base the table entity on.
|
||||||
|
* @param type The type of entity to create.
|
||||||
|
* @param entityType The entity's classification.
|
||||||
|
* @return An optional containing the created {@link TableEntity}.
|
||||||
|
*/
|
||||||
|
public Optional<TableEntity> bySemanticNode(Table table, String type, EntityType entityType) {
|
||||||
|
|
||||||
|
TableEntity tableEntity = TableEntity.initialEntityNode(table, type, entityType);
|
||||||
|
|
||||||
|
Optional<TableEntity> optionalTableEntity = table.getDocumentTree().getRoot().getNode().getEntities()
|
||||||
|
.stream()
|
||||||
|
.filter(e -> e instanceof TableEntity && e.equals(tableEntity) && e.type().equals(type))
|
||||||
|
.map(e -> (TableEntity) e)
|
||||||
|
.findAny();
|
||||||
|
if (optionalTableEntity.isPresent()) {
|
||||||
|
return optionalTableEntity;
|
||||||
|
}
|
||||||
|
|
||||||
|
addListenerToEntity(tableEntity);
|
||||||
|
table.getDocumentTree().addEntityToGraph(tableEntity);
|
||||||
|
|
||||||
|
return Optional.of(tableEntity);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Expands a text entity's start boundary based on a regex pattern match.
|
* Expands a text entity's start boundary based on a regex pattern match.
|
||||||
*
|
*
|
||||||
@ -978,7 +1008,8 @@ public class EntityCreationService {
|
|||||||
if (node.getDocumentTree().getRoot().getNode().getEntities().contains(entity)) {
|
if (node.getDocumentTree().getRoot().getNode().getEntities().contains(entity)) {
|
||||||
Optional<TextEntity> optionalTextEntity = node.getDocumentTree().getRoot().getNode().getEntities()
|
Optional<TextEntity> optionalTextEntity = node.getDocumentTree().getRoot().getNode().getEntities()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(e -> e.equals(entity) && e.type().equals(type))
|
.filter(e -> e instanceof TextEntity && e.equals(entity) && e.type().equals(type))
|
||||||
|
.map(e -> (TextEntity) e)
|
||||||
.peek(e -> e.addEngines(engines))
|
.peek(e -> e.addEngines(engines))
|
||||||
.findAny();
|
.findAny();
|
||||||
if (optionalTextEntity.isEmpty()) {
|
if (optionalTextEntity.isEmpty()) {
|
||||||
@ -1419,7 +1450,7 @@ public class EntityCreationService {
|
|||||||
.filter(e -> e.equals(entity))//
|
.filter(e -> e.equals(entity))//
|
||||||
.filter(e -> !e.getTextRange().equals(entity.getTextRange()))//
|
.filter(e -> !e.getTextRange().equals(entity.getTextRange()))//
|
||||||
.findAny()
|
.findAny()
|
||||||
.ifPresent(e -> addDuplicateEntityToGraph(e, entity.getTextRange(), node));
|
.ifPresent(e -> addDuplicateEntityToGraph((TextEntity) e, entity.getTextRange(), node));
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
addListenerToEntity(entity);
|
addListenerToEntity(entity);
|
||||||
@ -1469,19 +1500,17 @@ public class EntityCreationService {
|
|||||||
}
|
}
|
||||||
additionalIntersectingNode.getEntities().add(entityToDuplicate);
|
additionalIntersectingNode.getEntities().add(entityToDuplicate);
|
||||||
additionalIntersectingNode.getPages(newTextRange)
|
additionalIntersectingNode.getPages(newTextRange)
|
||||||
.forEach(page -> page.getEntities().add(entityToDuplicate));
|
.forEach(page -> page.getSemanticEntities().add(entityToDuplicate));
|
||||||
entityToDuplicate.addIntersectingNode(additionalIntersectingNode);
|
entityToDuplicate.addIntersectingNode(additionalIntersectingNode);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void addListenerToEntity(TextEntity textEntity) {
|
private void addListenerToEntity(IEntity textEntity) {
|
||||||
|
|
||||||
if(kieSessionUpdater != null) {
|
if (kieSessionUpdater != null) {
|
||||||
textEntity.addEntityEventListener(kieSessionUpdater);
|
textEntity.addEntityEventListener(kieSessionUpdater);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -162,7 +162,7 @@ public class EntityFromPrecursorCreationService {
|
|||||||
correctEntity.getIntersectingNodes()
|
correctEntity.getIntersectingNodes()
|
||||||
.forEach(n -> n.getEntities().add(correctEntity));
|
.forEach(n -> n.getEntities().add(correctEntity));
|
||||||
correctEntity.getPages()
|
correctEntity.getPages()
|
||||||
.forEach(page -> page.getEntities().add(correctEntity));
|
.forEach(page -> page.getSemanticEntities().add(correctEntity));
|
||||||
|
|
||||||
correctEntity.addMatchedRules(precursorEntity.getMatchedRuleList());
|
correctEntity.addMatchedRules(precursorEntity.getMatchedRuleList());
|
||||||
correctEntity.setDictionaryEntry(precursorEntity.isDictionaryEntry());
|
correctEntity.setDictionaryEntry(precursorEntity.isDictionaryEntry());
|
||||||
|
|||||||
@ -28,6 +28,7 @@ import com.iqser.red.service.redaction.v1.server.logger.RulesLogger;
|
|||||||
import com.iqser.red.service.redaction.v1.server.logger.TrackingAgendaEventListener;
|
import com.iqser.red.service.redaction.v1.server.logger.TrackingAgendaEventListener;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
|
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
|
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
||||||
@ -128,9 +129,9 @@ public class EntityDroolsExecutionService {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
for (TextEntity textEntity : document.getEntities()) {
|
for (SemanticEntity semanticEntity : document.getEntities()) {
|
||||||
textEntity.addEntityEventListener(kieSessionUpdater);
|
semanticEntity.addEntityEventListener(kieSessionUpdater);
|
||||||
textEntity.notifyEntityInserted();
|
semanticEntity.notifyEntityInserted();
|
||||||
}
|
}
|
||||||
|
|
||||||
document.getPages()
|
document.getPages()
|
||||||
|
|||||||
@ -9,6 +9,8 @@ import org.kie.api.runtime.rule.FactHandle;
|
|||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityEventListener;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityEventListener;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||||
@ -50,14 +52,14 @@ public class KieSessionUpdater implements EntityEventListener {
|
|||||||
|
|
||||||
private void handleOnEntityEvent(IEntity entity, Consumer<Object> consumer) {
|
private void handleOnEntityEvent(IEntity entity, Consumer<Object> consumer) {
|
||||||
|
|
||||||
if (entity instanceof TextEntity textEntity) {
|
if (entity instanceof SemanticEntity semanticEntity) {
|
||||||
updateIntersectingNodes(textEntity);
|
updateIntersectingNodes(semanticEntity);
|
||||||
textEntity.getRelations().values()
|
semanticEntity.getRelations().values()
|
||||||
.stream()
|
.stream()
|
||||||
.flatMap(Collection::stream)
|
.flatMap(Collection::stream)
|
||||||
.forEach(consumer);
|
.forEach(consumer);
|
||||||
textEntity.getRelations().keySet()
|
semanticEntity.getRelations().keySet()
|
||||||
.forEach(k -> k.getRelations().getOrDefault(textEntity, Collections.emptySet())
|
.forEach(k -> k.getRelations().getOrDefault(semanticEntity, Collections.emptySet())
|
||||||
.forEach(consumer));
|
.forEach(consumer));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -71,9 +73,9 @@ public class KieSessionUpdater implements EntityEventListener {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void updateIntersectingNodes(TextEntity textEntity) {
|
private void updateIntersectingNodes(SemanticEntity semanticEntity) {
|
||||||
|
|
||||||
textEntity.getIntersectingNodes()
|
semanticEntity.getIntersectingNodes()
|
||||||
.forEach(this::updateFactIfPresent);
|
.forEach(this::updateFactIfPresent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -25,8 +25,7 @@ fforesight:
|
|||||||
ignored-endpoints: [ '/redaction-gateway-v1', '/actuator/health/**',"/api/rules-logging/rulesocket","/api/rules-logging/rulesocket/**", '/redaction-gateway-v1/async/download/with-ott/**',
|
ignored-endpoints: [ '/redaction-gateway-v1', '/actuator/health/**',"/api/rules-logging/rulesocket","/api/rules-logging/rulesocket/**", '/redaction-gateway-v1/async/download/with-ott/**',
|
||||||
'/internal-api/**', '/redaction-gateway-v1/docs/swagger-ui', '/rules/test',
|
'/internal-api/**', '/redaction-gateway-v1/docs/swagger-ui', '/rules/test',
|
||||||
'/redaction-gateway-v1/docs/**','/redaction-gateway-v1/docs',
|
'/redaction-gateway-v1/docs/**','/redaction-gateway-v1/docs',
|
||||||
'/api', '/api/','/api/docs/**','/api/docs','/api/docs/swagger-ui',
|
'/api', '/api/','/api/docs/**','/api/docs','/api/docs/swagger-ui' ]
|
||||||
'/actuator/prometheus' ]
|
|
||||||
|
|
||||||
spring:
|
spring:
|
||||||
application:
|
application:
|
||||||
|
|||||||
@ -37,6 +37,8 @@ import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService
|
|||||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||||
|
|
||||||
|
import lombok.SneakyThrows;
|
||||||
|
|
||||||
@ExtendWith(SpringExtension.class)
|
@ExtendWith(SpringExtension.class)
|
||||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, properties = {"application.type=DocuMine"})
|
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, properties = {"application.type=DocuMine"})
|
||||||
public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
||||||
@ -262,6 +264,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
|||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@SneakyThrows
|
||||||
public void testDoseMortalityExtraction() {
|
public void testDoseMortalityExtraction() {
|
||||||
|
|
||||||
AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/Documine/Flora/VV-547525_Toxicidade_Oral_Aguda.pdf");
|
AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/Documine/Flora/VV-547525_Toxicidade_Oral_Aguda.pdf");
|
||||||
@ -269,9 +272,18 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
|||||||
System.out.println("Start Full integration test");
|
System.out.println("Start Full integration test");
|
||||||
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
|
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
|
||||||
System.out.println("Finished structure analysis");
|
System.out.println("Finished structure analysis");
|
||||||
analyzeService.analyze(request);
|
AnalyzeResult analyze = analyzeService.analyze(request);
|
||||||
System.out.println("Finished analysis");
|
System.out.println("Finished analysis");
|
||||||
|
|
||||||
|
|
||||||
|
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
|
||||||
|
|
||||||
|
String outputFileName = OsUtils.getTemporaryDirectory() + "/Documine.pdf";
|
||||||
|
|
||||||
|
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
|
||||||
|
fileOutputStream.write(annotateResponse.getDocument());
|
||||||
|
}
|
||||||
|
|
||||||
var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||||
var doseMortality = componentLog.getComponentLogEntries()
|
var doseMortality = componentLog.getComponentLogEntries()
|
||||||
.stream()
|
.stream()
|
||||||
|
|||||||
@ -0,0 +1,90 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
|
import org.springframework.boot.test.context.SpringBootTest;
|
||||||
|
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||||
|
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||||
|
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||||
|
|
||||||
|
@ExtendWith(SpringExtension.class)
|
||||||
|
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, properties = {"application.type=DocuMine"})
|
||||||
|
class TableComponentsIntegrationTest extends AbstractRedactionIntegrationTest {
|
||||||
|
|
||||||
|
private static final String RULES = loadFromClassPath("drools/documine_flora_table_test.drl");
|
||||||
|
private static final String COMPONENT_RULES = loadFromClassPath("drools/documine_flora_table_test_components.drl");
|
||||||
|
private static final String DATE_FORMATS = loadFromClassPath("dateFormats.txt");
|
||||||
|
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
public void stubClients() {
|
||||||
|
|
||||||
|
TenantContext.setTenantId("redaction");
|
||||||
|
|
||||||
|
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(System.currentTimeMillis());
|
||||||
|
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(RULES));
|
||||||
|
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.COMPONENT)).thenReturn(System.currentTimeMillis());
|
||||||
|
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.COMPONENT)).thenReturn(JSONPrimitive.of(COMPONENT_RULES));
|
||||||
|
when(dateFormatsClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(System.currentTimeMillis());
|
||||||
|
when(dateFormatsClient.getDateFormats(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(DATE_FORMATS));
|
||||||
|
|
||||||
|
loadDictionaryForTest();
|
||||||
|
loadTypeForTest();
|
||||||
|
loadNerForTest();
|
||||||
|
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||||
|
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, null, true)).thenReturn(getTemplateDictionaryTypeResponse());
|
||||||
|
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
|
||||||
|
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, null, true)).thenReturn(getDossierDictionaryTypeResponse());
|
||||||
|
mockDictionaryCalls(null);
|
||||||
|
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testTableComponentsCreation() throws IOException {
|
||||||
|
|
||||||
|
AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/Documine/Flora/VV-547525_Toxicidade_Oral_Aguda.pdf");
|
||||||
|
|
||||||
|
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
|
||||||
|
|
||||||
|
analyzeService.analyze(request);
|
||||||
|
|
||||||
|
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
|
||||||
|
String outputFileName = OsUtils.getTemporaryDirectory() + "/TableComponents.pdf";
|
||||||
|
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
|
||||||
|
fileOutputStream.write(annotateResponse.getDocument());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||||
|
boolean tableComponentFound = componentLog.getComponentLogEntries()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(entry -> "Table".equals(entry.getName()));
|
||||||
|
|
||||||
|
assertTrue(tableComponentFound, "Expected table component 'Table' to be present in the component log");
|
||||||
|
|
||||||
|
var entityLog = redactionStorageService.getEntityLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||||
|
boolean tableEntityFound = entityLog.getEntityLogEntry()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(entry -> entry.getMatchedRule() != null && entry.getMatchedRule().contains("T.0.0"));
|
||||||
|
|
||||||
|
assertTrue(tableEntityFound, "Expected table entity creation ('T.0.0') to be present in the entity log");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -348,11 +348,11 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
|
|||||||
.orElseThrow();
|
.orElseThrow();
|
||||||
|
|
||||||
assertEquals(textEntity.getValue(), searchTerm);
|
assertEquals(textEntity.getValue(), searchTerm);
|
||||||
assertTrue(pageNode.getEntities().contains(textEntity));
|
assertTrue(pageNode.getSemanticEntities().contains(textEntity));
|
||||||
assertTrue(document.getPages()
|
assertTrue(document.getPages()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(page -> page != pageNode)
|
.filter(page -> page != pageNode)
|
||||||
.noneMatch(page -> page.getEntities().contains(textEntity)));
|
.noneMatch(page -> page.getSemanticEntities().contains(textEntity)));
|
||||||
assertTrue(textEntity.getPages().contains(pageNode));
|
assertTrue(textEntity.getPages().contains(pageNode));
|
||||||
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
|
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
|
||||||
assertTrue(textEntity.getIntersectingNodes()
|
assertTrue(textEntity.getIntersectingNodes()
|
||||||
|
|||||||
@ -30,6 +30,7 @@ import com.iqser.red.service.redaction.v1.server.logger.Context;
|
|||||||
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
|
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
|
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionarySearch;
|
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionarySearch;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||||
@ -200,14 +201,14 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest {
|
|||||||
System.out.printf("%d Searches took %s s, average %.2f ms\n", numberOfRuns, ((float) totalSearchTime / 1000), totalSearchTime / numberOfRuns);
|
System.out.printf("%d Searches took %s s, average %.2f ms\n", numberOfRuns, ((float) totalSearchTime / 1000), totalSearchTime / numberOfRuns);
|
||||||
System.out.printf("%d Insertions took %s s, average %.2f ms\n", numberOfRuns, ((float) totalInsertTime / 1000), totalInsertTime / numberOfRuns);
|
System.out.printf("%d Insertions took %s s, average %.2f ms\n", numberOfRuns, ((float) totalInsertTime / 1000), totalInsertTime / numberOfRuns);
|
||||||
System.out.printf("Found %d entities and saved %d\n", foundEntities.size(), document.getEntities().size());
|
System.out.printf("Found %d entities and saved %d\n", foundEntities.size(), document.getEntities().size());
|
||||||
for (TextEntity entity : document.getEntities()) {
|
for (TextEntity entity : document.getTextEntities()) {
|
||||||
var foundEntity = foundEntities.stream()
|
var foundEntity = foundEntities.stream()
|
||||||
.filter(f -> f.getId().equals(entity.getId()))
|
.filter(f -> f.getId().equals(entity.getId()))
|
||||||
.findFirst()
|
.findFirst()
|
||||||
.get();
|
.get();
|
||||||
assertTrue(foundEntity.getTextRange().equals(entity.getTextRange()) || foundEntity.getDuplicateTextRanges().contains(entity.getTextRange()));
|
assertTrue(foundEntity.getTextRange().equals(entity.getTextRange()) || foundEntity.getDuplicateTextRanges().contains(entity.getTextRange()));
|
||||||
}
|
}
|
||||||
assert document.getEntities()
|
assert document.getTextEntities()
|
||||||
.stream()
|
.stream()
|
||||||
.mapToInt(e -> e.getDuplicateTextRanges().size() + 1).sum() == foundEntities.size();
|
.mapToInt(e -> e.getDuplicateTextRanges().size() + 1).sum() == foundEntities.size();
|
||||||
assert foundEntities.stream()
|
assert foundEntities.stream()
|
||||||
@ -225,10 +226,10 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest {
|
|||||||
try (PDDocument pdDocument = Loader.loadPDF(fileResource.getFile())) {
|
try (PDDocument pdDocument = Loader.loadPDF(fileResource.getFile())) {
|
||||||
|
|
||||||
for (Page page : document.getPages()) {
|
for (Page page : document.getPages()) {
|
||||||
List<Rectangle2D> entityPositionsOnPage = page.getEntities()
|
List<Rectangle2D> entityPositionsOnPage = page.getSemanticEntities()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(entityNode -> !entityNode.removed())
|
.filter(entityNode -> !entityNode.removed())
|
||||||
.filter(TextEntity::applied)
|
.filter(SemanticEntity::applied)
|
||||||
.flatMap(entityNode -> entityNode.getPositionsOnPagePerPage()
|
.flatMap(entityNode -> entityNode.getPositionsOnPagePerPage()
|
||||||
.stream())
|
.stream())
|
||||||
.filter(entityPosition -> entityPosition.getPage().equals(page))
|
.filter(entityPosition -> entityPosition.getPage().equals(page))
|
||||||
@ -241,7 +242,7 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (Page page : document.getPages()) {
|
for (Page page : document.getPages()) {
|
||||||
List<Rectangle2D> entityPositionsOnPage = page.getEntities()
|
List<Rectangle2D> entityPositionsOnPage = page.getSemanticEntities()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(entityNode -> !entityNode.removed())
|
.filter(entityNode -> !entityNode.removed())
|
||||||
.filter(entityNode -> !entityNode.applied())
|
.filter(entityNode -> !entityNode.applied())
|
||||||
|
|||||||
@ -83,7 +83,7 @@ public class TableTest extends BuildDocumentIntegrationTest {
|
|||||||
file);
|
file);
|
||||||
PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null);
|
PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null);
|
||||||
|
|
||||||
var entityLayerGroup = EntityVisualizationUtility.createEntityLayerGroup(document.getEntities(), Color.MAGENTA);
|
var entityLayerGroup = EntityVisualizationUtility.createEntityLayerGroup(document.getTextEntities(), Color.MAGENTA);
|
||||||
|
|
||||||
viewerDocumentService.addLayerGroups(file, file, List.of(entityLayerGroup));
|
viewerDocumentService.addLayerGroups(file, file, List.of(entityLayerGroup));
|
||||||
}
|
}
|
||||||
|
|||||||
@ -22,7 +22,7 @@ public class Cbi11Test extends RulesIntegrationTest {
|
|||||||
|
|
||||||
doAnalysis(document, Collections.emptyList());
|
doAnalysis(document, Collections.emptyList());
|
||||||
|
|
||||||
List<String> authorNames = document.getEntities()
|
List<String> authorNames = document.getTextEntities()
|
||||||
.stream()
|
.stream()
|
||||||
.map(Dictionary::splitIntoAuthorNames)
|
.map(Dictionary::splitIntoAuthorNames)
|
||||||
.flatMap(Collection::stream)
|
.flatMap(Collection::stream)
|
||||||
|
|||||||
@ -9,6 +9,7 @@ import java.util.Set;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||||
import com.knecon.fforesight.service.viewerdoc.LayerIdentifier;
|
import com.knecon.fforesight.service.viewerdoc.LayerIdentifier;
|
||||||
@ -46,9 +47,9 @@ public class EntityVisualizationUtility {
|
|||||||
|
|
||||||
private static List<ColoredRectangle> getEntityRectangles(Color color, Page page) {
|
private static List<ColoredRectangle> getEntityRectangles(Color color, Page page) {
|
||||||
|
|
||||||
return page.getEntities()
|
return page.getSemanticEntities()
|
||||||
.stream()
|
.stream()
|
||||||
.map(TextEntity::getPositionsOnPagePerPage)
|
.map(SemanticEntity::getPositionsOnPagePerPage)
|
||||||
.flatMap(Collection::stream)
|
.flatMap(Collection::stream)
|
||||||
.filter(p -> p.getPage().equals(page))
|
.filter(p -> p.getPage().equals(page))
|
||||||
.map(PositionOnPage::getRectanglePerLine)
|
.map(PositionOnPage::getRectanglePerLine)
|
||||||
|
|||||||
@ -460,7 +460,6 @@ rule "UsedApproach.1.0: Used approach not found and thus 'Individual'"
|
|||||||
componentCreationService.create("UsedApproach.1.0", "What_was_the_approach_used", "Individual", "'Group' when approach used is present, else 'Individual'");
|
componentCreationService.create("UsedApproach.1.0", "What_was_the_approach_used", "Individual", "'Group' when approach used is present, else 'Individual'");
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
rule "DefaultComponents.999.0: Create components for all unmapped entities."
|
rule "DefaultComponents.999.0: Create components for all unmapped entities."
|
||||||
salience -999
|
salience -999
|
||||||
when
|
when
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,492 @@
|
|||||||
|
package drools
|
||||||
|
|
||||||
|
import static java.lang.String.format;
|
||||||
|
import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.anyMatch;
|
||||||
|
import static com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility.exactMatch;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import com.iqser.red.service.redaction.v1.server.logger.RulesLogger;
|
||||||
|
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.component.Component;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.component.Entity;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.service.components.ComponentMappingService;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.service.document.ComponentCreationService;
|
||||||
|
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Change;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
|
||||||
|
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
|
||||||
|
|
||||||
|
global ComponentCreationService componentCreationService
|
||||||
|
global ComponentMappingService componentMappingService
|
||||||
|
global RulesLogger logger
|
||||||
|
|
||||||
|
|
||||||
|
//------------------------------------ queries ------------------------------------
|
||||||
|
|
||||||
|
query "getFileAttributes"
|
||||||
|
$fileAttribute: FileAttribute()
|
||||||
|
end
|
||||||
|
|
||||||
|
query "getComponents"
|
||||||
|
$component: Component()
|
||||||
|
end
|
||||||
|
|
||||||
|
//------------------------------------ table rules ------------------------------------
|
||||||
|
|
||||||
|
rule "TableComponents.0.0: Create components for all table entities."
|
||||||
|
when
|
||||||
|
$tables: List() from collect (Entity(type == "Table"))
|
||||||
|
then
|
||||||
|
componentCreationService.createComponentForTables("TableComponents.0.0", $tables);
|
||||||
|
end
|
||||||
|
|
||||||
|
//------------------------------------ Default Components rules ------------------------------------
|
||||||
|
|
||||||
|
rule "StudyTitle.0.0: First Title found"
|
||||||
|
when
|
||||||
|
$titleCandidates: List() from collect (Entity(type == "title"))
|
||||||
|
then
|
||||||
|
componentCreationService.firstOrElse("StudyTitle.0.0", "Study_Title", $titleCandidates, "");
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
rule "PerformingLaboratory.1.0: Performing Laboratory name and country found in same section"
|
||||||
|
when
|
||||||
|
$laboratoryName: Entity(type == "laboratory_name", $node: containingNode)
|
||||||
|
$laboratoryCountry: Entity(type == "laboratory_country", containingNode == $node)
|
||||||
|
not Entity(type == "laboratory_country", containingNode == $node, Math.abs($laboratoryName.startOffset - startOffset) < Math.abs($laboratoryName.startOffset - $laboratoryCountry.startOffset))
|
||||||
|
then
|
||||||
|
componentCreationService.create("PerformingLaboratory.1.0", "Performing_Laboratory", $laboratoryName.getValue() + ", " + $laboratoryCountry.getValue(), "Laboratory name and country found!", List.of($laboratoryName, $laboratoryCountry));
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "PerformingLaboratory.2.0: Performing Laboratory name but no country found in same section"
|
||||||
|
when
|
||||||
|
$laboratoryName: Entity(type == "laboratory_name", $node: containingNode)
|
||||||
|
not Entity(type == "laboratory_country", containingNode == $node)
|
||||||
|
then
|
||||||
|
componentCreationService.create("PerformingLaboratory.2.0", "Performing_Laboratory", $laboratoryName.getValue(), "Only laboratory name found!", List.of($laboratoryName));
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "PerformingLaboratory.0.2: Performing Laboratory not found"
|
||||||
|
salience -1
|
||||||
|
when
|
||||||
|
not Component(name == "Performing_Laboratory")
|
||||||
|
then
|
||||||
|
componentCreationService.create("PerformingLaboratory.0.2", "Performing_Laboratory", "", "fallback");
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
rule "ReportNumber.0.0: First Report number found"
|
||||||
|
when
|
||||||
|
$reportNumberCandidates: List() from collect (Entity(type == "report_number"))
|
||||||
|
then
|
||||||
|
componentCreationService.firstOrElse("ReportNumber.0.0", "Report_Number", $reportNumberCandidates, "");
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
rule "GLPStudy.0.0: GLP Study found"
|
||||||
|
when
|
||||||
|
$glpStudyList: List(!isEmpty) from collect(Entity(type == "glp_study"))
|
||||||
|
then
|
||||||
|
componentCreationService.create("GLPStudy.0.0", "GLP_Study", "Yes", "Yes if present, No if not", $glpStudyList);
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "GLPStudy.1.0: GLP Study not found"
|
||||||
|
when
|
||||||
|
not Entity(type == "glp_study")
|
||||||
|
then
|
||||||
|
componentCreationService.create("GLPStudy.1.0", "GLP_Study", "No", "Yes if present, No if not");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "TestGuideline.0.1: match OECD number and year with guideline mappings"
|
||||||
|
salience 1
|
||||||
|
when
|
||||||
|
not Component(name == "Test_Guidelines_1")
|
||||||
|
$guidelineNumber: Entity(type == "oecd_guideline_number", $number: value)
|
||||||
|
$guidelineYear: Entity(type == "oecd_guideline_year", $year: value)
|
||||||
|
then
|
||||||
|
Optional<String> guidelineMatch = componentMappingService.from("GuidelineMapping").where("number = " + $number).where("year = " + $year).select("description").findAny();
|
||||||
|
if (guidelineMatch.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
componentCreationService.create(
|
||||||
|
"TestGuideline.0.0",
|
||||||
|
"Test_Guidelines_1",
|
||||||
|
guidelineMatch.get(),
|
||||||
|
"OECD Number and guideline year mapped!",
|
||||||
|
List.of($guidelineNumber, $guidelineYear)
|
||||||
|
);
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "TestGuideline.1.0: no guideline mapping found"
|
||||||
|
when
|
||||||
|
not Component(name == "Test_Guidelines_1")
|
||||||
|
$guideLine: Entity(type == "oecd_guideline")
|
||||||
|
then
|
||||||
|
componentCreationService.create("TestGuideline.2.0", "Test_Guidelines_1", $guideLine.getValue(), "No Mapping for OECD number and year found, using fallback instead!", List.of($guideLine));
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "TestGuideline.2.0: All values of EPA guideline and EC guidelines"
|
||||||
|
when
|
||||||
|
$guidelines: List() from collect (Entity(type == "epa_guideline" || type == "ec_guideline"))
|
||||||
|
then
|
||||||
|
componentCreationService.joining("TestGuideline.2.0", "Test_Guidelines_2", $guidelines);
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
rule "StartDate.0.0: All experimental start dates converted to dd/MM/yyyy"
|
||||||
|
when
|
||||||
|
$startDates: List() from collect (Entity(type == "experimental_start_date"))
|
||||||
|
then
|
||||||
|
componentCreationService.convertDates("StartDate.0.0", "Experimental_Starting_Date", $startDates);
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
rule "CompletionDate.0.0: All experimental end dates converted to dd/MM/yyyy"
|
||||||
|
when
|
||||||
|
$endDates: List() from collect (Entity(type == "experimental_end_date"))
|
||||||
|
then
|
||||||
|
componentCreationService.convertDates("CompletionDate.0.0", "Experimental_Completion_Date", $endDates);
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
rule "AnalysisCertificate.0.0: Unique values of certificate of analysis batch identification"
|
||||||
|
when
|
||||||
|
$batchNumbers: List() from collect (Entity(type == "batch_number"))
|
||||||
|
then
|
||||||
|
componentCreationService.joiningUnique("AnalysisCertificate.0.0", "Certificate_of_Analysis_Batch_Identification", $batchNumbers);
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "StudyConclusion.0.0: Study conclusion in first found section"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$studyConclusions: List() from collect(Entity(type == "study_conclusion"))
|
||||||
|
then
|
||||||
|
componentCreationService.joiningFromFirstSectionOnly("StudyConclusion.0.0", "Study_Conclusion", $studyConclusions, " ");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "GuidelineDeviation.0.0: Guideline deviation as sentences"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$guidelineDeviations: List() from collect (Entity(type == "guideline_deviation"))
|
||||||
|
then
|
||||||
|
componentCreationService.joining("GuidelineDeviation.0.0", "Deviation_from_the_Guideline", $guidelineDeviations, "\n");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "Species.0.0: First found species"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$species: List() from collect (Entity(type == "species"))
|
||||||
|
then
|
||||||
|
componentCreationService.firstOrElse("Species.0.0", "Species", $species, "");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "Strain.0.0: First found strain"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("402", "403", "404", "405", "425", "429", "436", "471")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$strain: List() from collect (Entity(type == "strain"))
|
||||||
|
then
|
||||||
|
componentCreationService.firstOrElse("Strain.0.0", "Strain", $strain, "");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "Conclusion.0.0: Unique values of Conclusion LD50"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("402", "403", "425", "436")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$conclusions: List() from collect (Entity(type == "ld50_value"))
|
||||||
|
then
|
||||||
|
componentCreationService.joiningUnique("Conclusion.0.0", "Conclusion_LD50_mg_per_kg", $conclusions);
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "Conclusion0.1.0: Greater than found"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("402", "403", "425", "436")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$conclusions: List(!isEmpty()) from collect (Entity(type == "ld50_greater"))
|
||||||
|
then
|
||||||
|
componentCreationService.create("Conclusion.1.0", "Conclusion_LD50_Greater_than", "Greater than", "Entity of type 'ld50_greater' found", $conclusions);
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "Conclusion.1.1: Greater than not found"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("402", "403", "425", "436")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
not Entity(type == "ld50_greater")
|
||||||
|
then
|
||||||
|
componentCreationService.create("Conclusion.1.1", "Conclusion_LD50_Greater_than", "", "No entity of type 'ld50_greater' found");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "Conclusion.2.0: Minimum confidence as unique values"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("402", "403", "425", "436")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$conclusions: List() from collect (Entity(type == "confidence_minimal"))
|
||||||
|
then
|
||||||
|
componentCreationService.joiningUnique("Conclusion.2.0", "Conclusion_Minimum_Confidence", $conclusions);
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "Conclusion.3.0: Maximum confidence as unique values"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("402", "403", "425", "436")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$conclusions: List() from collect (Entity(type == "confidence_maximal"))
|
||||||
|
then
|
||||||
|
componentCreationService.joiningUnique("Conclusion.3.0", "Conclusion_Maximum_Confidence", $conclusions);
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "Necropsy.0.0: Necropsy findings from longest section"
|
||||||
|
when
|
||||||
|
FileAttribute(label == "OECD Number", value == "402")
|
||||||
|
$necropsies: List() from collect (Entity(type == "necropsy_findings"))
|
||||||
|
then
|
||||||
|
componentCreationService.joiningFromLongestSectionOnly("Necropsy.0.0", "Necropsy_Findings", $necropsies, " ");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "Necropsy.0.1: Necropsy findings joined with \n"
|
||||||
|
when
|
||||||
|
FileAttribute(label == "OECD Number", value == "403" || value == "436")
|
||||||
|
$necropsies: List() from collect (Entity(type == "necropsy_findings"))
|
||||||
|
then
|
||||||
|
componentCreationService.joining("Necropsy.0.0", "Necropsy_Findings", $necropsies, "\n");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "Necropsy.1.0: Doses mg per kg of Bodyweight as one block"
|
||||||
|
when
|
||||||
|
FileAttribute(label == "OECD Number", value == "402")
|
||||||
|
$dosages: List() from collect (Entity(type == "doses_(mg_kg_bw)"))
|
||||||
|
then
|
||||||
|
componentCreationService.joining("Necropsy.1.0", "Doses_mg_per_kg_bw", $dosages, " ");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "Necropsy.2.0: Conducted with 4 hours of exposure as one block"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("403", "436")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$exposures: List() from collect (Entity(type == "4h_exposure"))
|
||||||
|
then
|
||||||
|
componentCreationService.joining("Necropsy.3.0", "Conducted_with_4_Hours_of_Exposure", $exposures, " ");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "StudyDesign.0.0: Study design as one block"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("404", "405", "406", "428", "429", "438", "439", "474", "487")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$studyDesigns: List() from collect (Entity(type == "study_design"))
|
||||||
|
then
|
||||||
|
componentCreationService.joining("StudyDesign.0.0", "Study_Design", $studyDesigns, " ");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "Results.0.0: Results and conclusions as joined values"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("406", "428", "438", "439", "474", "487")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$results: List() from collect (Entity(type == "results_and_conclusion"))
|
||||||
|
then
|
||||||
|
componentCreationService.joining("Results.0.0", "Results_and_Conclusions", $results, " ");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "WeightBehavior.0.0: Weight change behavior as sentences"
|
||||||
|
when
|
||||||
|
FileAttribute(label == "OECD Number", value == "402")
|
||||||
|
$weightChanges: List() from collect (Entity(type == "weight_behavior_changes"))
|
||||||
|
then
|
||||||
|
componentCreationService.joining("WeightBehavior.0.0", "Weight_Behavior_Changes", $weightChanges, "\n");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "MortalityStatement.0.0: Mortality statements as one block"
|
||||||
|
when
|
||||||
|
FileAttribute(label == "OECD Number", value == "402")
|
||||||
|
$mortalityStatements: List() from collect (Entity(type == "mortality_statement"))
|
||||||
|
then
|
||||||
|
componentCreationService.joining("MortalityStatement.0.0", "Mortality_Statement", $mortalityStatements, " ");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "ClinicalObservations.0.0: Clinical observations as sentences"
|
||||||
|
when
|
||||||
|
FileAttribute(label == "OECD Number", value == "403")
|
||||||
|
$observations: List() from collect (Entity(type == "clinical_observations"))
|
||||||
|
then
|
||||||
|
componentCreationService.joining("MortalityStatement.0.0", "Clinical_Observations", $observations, "\n");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "BodyWeight.0.0: Bodyweight changes as sentences"
|
||||||
|
when
|
||||||
|
FileAttribute(label == "OECD Number", value == "403")
|
||||||
|
$weightChanges: List() from collect (Entity(type == "bodyweight_changes"))
|
||||||
|
then
|
||||||
|
componentCreationService.joining("BodyWeight.0.0", "Body_Weight_Changes", $weightChanges, "\n");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "Detailing.0.0: Detailing of reported changes as one block"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("404", "405")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$detailings: List() from collect (Entity(type == "detailing"))
|
||||||
|
then
|
||||||
|
componentCreationService.joining("Detailing.0.0", "Detailing_of_Reported_Changes", $detailings, " ");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "Sex.0.0: Male sex found"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("405", "429")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$males: List(!isEmpty) from collect (Entity(type == "sex", (value.toLowerCase() == "male" || value.toLowerCase() == "males")))
|
||||||
|
then
|
||||||
|
componentCreationService.create("Sex.0.0", "Sex", "male", "male sex found", $males);
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "Sex.1.0: Female sex found"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("405", "429")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$females: List(!isEmpty) from collect (Entity(type == "sex", (value.toLowerCase() == "female" || value.toLowerCase() == "females")))
|
||||||
|
then
|
||||||
|
componentCreationService.create("Sex.0.0", "Sex", "female", "female sex found", $females);
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "NumberOfAnimals.0.0: Number of animals found"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("405", "429")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$numberOfAnimals: Entity(type == "number_of_animals")
|
||||||
|
then
|
||||||
|
componentCreationService.create("NumberOfAnimals.0.0", "Number_of_Animals", $numberOfAnimals.getValue(), "Number of animals found directly", $numberOfAnimals);
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "NumberOfAnimals.1.0: Count unique occurences of animals"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("405", "429")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
not Entity(type == "number_of_animals")
|
||||||
|
$animals: List() from collect (Entity(type == "animal_number"))
|
||||||
|
then
|
||||||
|
componentCreationService.uniqueValueCount("NumberOfAnimals.1.0", "Number_of_Animals", $animals);
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "ClinicalSigns.0.0: Clinical signs as sentences"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("425")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$clinicalSigns: List() from collect (Entity(type == "clinical_signs"))
|
||||||
|
then
|
||||||
|
componentCreationService.joining("ClinicalSigns.0.0", "Clinical_Signs", $clinicalSigns, "\n");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "DoseMortality.0.0: Dose mortality joined with dose from same table row"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("425")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$doseMortalities: List() from collect (Entity(type == "dose_mortality" || type == "dose_mortality_dose"))
|
||||||
|
then
|
||||||
|
componentCreationService.joiningFromSameTableRow("DoseMortality.0.0", "Dose_Mortality", $doseMortalities);
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "Mortality.0.0: Mortality as one block"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("425")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$mortalities: List() from collect (Entity(type == "mortality"))
|
||||||
|
then
|
||||||
|
componentCreationService.joining("Mortality.0.0", "Mortality", $mortalities, " ");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "Dosages.0.0: First found value of Dosages"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("425")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$mortalities: List() from collect (Entity(type == "dosages"))
|
||||||
|
then
|
||||||
|
componentCreationService.firstOrElse("Dosages.0.0", "Dosages", $mortalities, "");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "PrelimResults.0.0: Preliminary test results as sentences"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("429")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$results: List() from collect (Entity(type == "preliminary_test_results"))
|
||||||
|
then
|
||||||
|
componentCreationService.joining("PrelimResults.0.0", "Preliminary_Test_Results", $results, "\n");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "TestResults.0.0: Test results as one block"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("429")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$results: List() from collect (Entity(type == "test_results"))
|
||||||
|
then
|
||||||
|
componentCreationService.joining("TestResults.0.0", "Test_Results", $results, " ");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "PositiveControl.0.0: Was the definitive study conducted with positive control"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("429")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$results: List() from collect (Entity(type == "positive_control"))
|
||||||
|
then
|
||||||
|
componentCreationService.joining("PositiveControl.0.0", "Was_the_definitive_study_conducted_with_positive_control", $results, " ");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "MainResults.0.0: Results from main study as one block"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("429")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$results: List() from collect (Entity(type == "results_(main_study)"))
|
||||||
|
then
|
||||||
|
componentCreationService.joining("MainResults.0.0", "Results_Main_Study", $results, " ");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "UsedApproach.0.0: Used approach found and mapped to 'Group'"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("429")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
$results: List(!isEmpty()) from collect (Entity(type == "approach_used"))
|
||||||
|
then
|
||||||
|
componentCreationService.create("UsedApproach.0.0", "What_was_the_approach_used", "Group", "'Group' when approach used is present, else 'Individual'", $results);
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "UsedApproach.1.0: Used approach not found and thus 'Individual'"
|
||||||
|
when
|
||||||
|
$oecdNumber: String() from List.of("429")
|
||||||
|
FileAttribute(label == "OECD Number", value == $oecdNumber)
|
||||||
|
not Entity(type == "approach_used")
|
||||||
|
then
|
||||||
|
componentCreationService.create("UsedApproach.1.0", "What_was_the_approach_used", "Individual", "'Group' when approach used is present, else 'Individual'");
|
||||||
|
end
|
||||||
|
|
||||||
|
rule "DefaultComponents.999.0: Create components for all unmapped entities."
|
||||||
|
salience -999
|
||||||
|
when
|
||||||
|
not FileAttribute(label == "OECD Number")
|
||||||
|
$allEntities: List(!isEmpty()) from collect (Entity())
|
||||||
|
then
|
||||||
|
componentCreationService.createComponentsForUnMappedEntities("DefaultComponents.999.0", $allEntities);
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
//------------------------------------ Component merging rules ------------------------------------
|
||||||
|
/*
|
||||||
|
rule "X.0.0: merge duplicate component references"
|
||||||
|
when
|
||||||
|
$first: Component()
|
||||||
|
$duplicate: Component(this != $first, name == $first.name, value == $first.value)
|
||||||
|
then
|
||||||
|
$first.getReferences().addAll($duplicate.getReferences());
|
||||||
|
retract($duplicate);
|
||||||
|
end
|
||||||
|
*/
|
||||||
@ -68,6 +68,15 @@ query "getFileAttributes"
|
|||||||
$fileAttribute: FileAttribute()
|
$fileAttribute: FileAttribute()
|
||||||
end
|
end
|
||||||
|
|
||||||
|
//------------------------------------ T rules ------------------------------------
|
||||||
|
rule "T.0.0: Create TableEntities for all Tables"
|
||||||
|
when
|
||||||
|
$table: Table()
|
||||||
|
then
|
||||||
|
Optional<TableEntity> tableEntity = entityCreationService.bySemanticNode($table, "Table", EntityType.ENTITY);
|
||||||
|
tableEntity.ifPresent(t -> t.apply("T.0.0", "Table found.", "n-a"));
|
||||||
|
end
|
||||||
|
|
||||||
//------------------------------------ H rules ------------------------------------
|
//------------------------------------ H rules ------------------------------------
|
||||||
|
|
||||||
// Rule unit: H.0
|
// Rule unit: H.0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user