RED-10708: Tables as components in DM

This commit is contained in:
maverickstuder 2025-01-27 16:52:13 +01:00
parent 518c38c2e9
commit 723263a7c4
31 changed files with 572 additions and 205 deletions

View File

@ -10,6 +10,7 @@ import java.util.Optional;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.GenericSemanticNode;
@ -362,22 +363,25 @@ public class DocumentTree {
}
public void addEntityToGraph(TextEntity entity) {
public void addEntityToGraph(SemanticEntity entity) {
getRoot().getNode().addThisToEntityIfIntersects(entity);
TextBlock textBlock = entity.getDeepestFullyContainingNode().getTextBlock();
EntityEnrichmentService.enrichEntity(entity, textBlock);
EntityCreationUtility.addToPages(entity);
EntityCreationUtility.addEntityToNodeEntitySets(entity);
if (entity.getEntityType().equals(EntityType.TEMPORARY)) {
return;
}
entity.computeRelations();
entity.notifyEntityInserted();
if (entity instanceof TextEntity textEntity) {
EntityEnrichmentService.enrichEntity(textEntity, textBlock);
textEntity.computeRelations();
entity.notifyEntityInserted(); // todo: table entity currently causes loop?
}
EntityCreationUtility.addEntityToNodeEntitySets(entity);
}

View File

@ -188,7 +188,7 @@ public interface IEntity {
*
* @return A set of references.
*/
default Set<TextEntity> references() {
default Set<SemanticEntity> references() {
return getMatchedRule().getReferences();
}

View File

@ -40,7 +40,7 @@ public final class MatchedRule implements Comparable<MatchedRule> {
boolean ignored;
@Builder.Default
Set<TextEntity> references = Collections.emptySet();
Set<SemanticEntity> references = Collections.emptySet();
/**

View File

@ -0,0 +1,110 @@
package com.iqser.red.service.redaction.v1.server.model.document.entity;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import lombok.AccessLevel;
import lombok.Builder;
import lombok.Data;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
@Data
@FieldDefaults(level = AccessLevel.PROTECTED)
@SuperBuilder
public abstract class SemanticEntity implements IEntity {
final EntityType entityType;
String type; // TODO: make final once ManualChangesApplicationService::recategorize is deleted
@Builder.Default
final PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
@Builder.Default
final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
@Builder.Default
Set<Page> pages = new HashSet<>();
List<PositionOnPage> positionsOnPagePerPage;
@Builder.Default
List<SemanticNode> intersectingNodes = new LinkedList<>();
SemanticNode deepestFullyContainingNode;
@Builder.Default
Map<SemanticEntity, Set<Relation>> relations = new HashMap<>();
@Builder.Default
Collection<EntityEventListener> entityEventListeners = new ArrayList<>();
/**
* @return true when this entity is of EntityType ENTITY or HINT
*/
public boolean validEntityType() {
return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT);
}
public boolean valid() {
return active() && validEntityType();
}
public boolean isType(String type) {
return type().equals(type);
}
public boolean isAnyType(List<String> types) {
return types.contains(type());
}
public boolean matchesAnnotationId(String manualRedactionId) {
return getPositionsOnPagePerPage().stream()
.anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
}
public void addIntersectingNode(SemanticNode containingNode) {
intersectingNodes.add(containingNode);
}
@Override
public void addEntityEventListener(EntityEventListener listener) {
entityEventListeners.add(listener);
}
@Override
public void removeEntityEventListener(EntityEventListener listener) {
entityEventListeners.remove(listener);
}
@Override
public Collection<EntityEventListener> getEntityEventListeners() {
return entityEventListeners;
}
}

View File

@ -0,0 +1,111 @@
package com.iqser.red.service.redaction.v1.server.model.document.entity;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import org.apache.commons.collections4.map.HashedMap;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
@Data
@SuperBuilder
@FieldDefaults(level = AccessLevel.PRIVATE)
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
public class TableEntity extends SemanticEntity {
@EqualsAndHashCode.Include
final String id;
Table table;
public static TableEntity initialEntityNode(Table table, String type, EntityType entityType) {
return TableEntity.builder()
.id(table.buildId(table.getTextRange(), type, entityType))
.type(type)
.entityType(entityType)
.manualOverwrite(new ManualChangeOverwrite(entityType))
.table(table)
.build();
}
@Override
public String getValue() {
return "Table:" + table.getHeadline();
}
@Override
public TextRange getTextRange() {
return table.getTextBlock().getTextRange();
}
@Override
public String type() {
return getManualOverwrite().getType()
.orElse(NodeType.TABLE.toString());
}
public void removeFromGraph() {
remove("FINAL.0.0", "removed completely");
intersectingNodes.forEach(node -> node.getEntities().remove(this));
pages.forEach(page -> page.getSemanticEntities().remove(this));
intersectingNodes = new LinkedList<>();
relations.keySet()
.forEach(entity -> entity.getRelations().remove(this));
relations = new HashedMap<>();
deepestFullyContainingNode = null;
pages = new HashSet<>();
}
public List<PositionOnPage> getPositionsOnPagePerPage() {
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = table.getTextBlock().getPositionsPerPage(table.getTextRange());
positionsOnPagePerPage = rectanglesPerLinePerPage.entrySet()
.stream()
.map(entry -> new PositionOnPage(id, entry.getKey(), entry.getValue()))
.toList();
}
return positionsOnPagePerPage;
}
public String asCsv() {
return table.asCsv();
}
}

View File

@ -1,15 +1,11 @@
package com.iqser.red.service.redaction.v1.server.model.document.entity;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import org.apache.commons.collections4.map.HashedMap;
@ -19,26 +15,24 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.utils.IdBuilder;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
/**
* Represents a text entity within a document, characterized by its text range, type, entity type,
* and associated metadata like matched rules, pages, and engines.
*/
@Data
@Builder
@AllArgsConstructor
@SuperBuilder
@FieldDefaults(level = AccessLevel.PRIVATE)
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
@SuppressWarnings("PMD.AvoidFieldNameMatchingMethodName")
public class TextEntity implements IEntity {
public class TextEntity extends SemanticEntity {
// primary key
@EqualsAndHashCode.Include
@ -48,13 +42,6 @@ public class TextEntity implements IEntity {
TextRange textRange;
@Builder.Default
Set<TextRange> duplicateTextRanges = new HashSet<>();
String type; // TODO: make final once ManualChangesApplicationService::recategorize is deleted
final EntityType entityType;
@Builder.Default
final PriorityQueue<MatchedRule> matchedRuleList = new PriorityQueue<>();
@Builder.Default
final ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
boolean dictionaryEntry;
boolean dossierDictionaryEntry;
@ -66,24 +53,12 @@ public class TextEntity implements IEntity {
String value;
String textBefore;
String textAfter;
@Builder.Default
Set<Page> pages = new HashSet<>();
List<PositionOnPage> positionsOnPagePerPage;
@Builder.Default
List<SemanticNode> intersectingNodes = new LinkedList<>();
SemanticNode deepestFullyContainingNode;
@Builder.Default
Map<TextEntity, Set<Relation>> relations = new HashMap<>();
@Builder.Default
Collection<EntityEventListener> entityEventListeners = new ArrayList<>();
public static TextEntity initialEntityNode(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
return TextEntity.builder()
.id(buildId(node, textRange, type, entityType))
.id(node.buildId(textRange, type, entityType))
.type(type)
.entityType(entityType)
.textRange(textRange)
@ -110,19 +85,6 @@ public class TextEntity implements IEntity {
}
private static String buildId(SemanticNode node, TextRange textRange, String type, EntityType entityType) {
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = node.getPositionsPerPage(textRange);
return IdBuilder.buildId(rectanglesPerLinePerPage.keySet(),
rectanglesPerLinePerPage.values()
.stream()
.flatMap(Collection::stream)
.toList(),
type,
entityType.name());
}
public void addTextRange(TextRange textRange) {
duplicateTextRanges.add(textRange);
@ -143,24 +105,6 @@ public class TextEntity implements IEntity {
}
public boolean isType(String type) {
return type().equals(type);
}
public boolean isAnyType(List<String> types) {
return types.contains(type());
}
public void addIntersectingNode(SemanticNode containingNode) {
intersectingNodes.add(containingNode);
}
public String getValueWithLineBreaks() {
return getDeepestFullyContainingNode().getTextBlock().subSequenceWithLineBreaks(getTextRange());
@ -171,7 +115,7 @@ public class TextEntity implements IEntity {
remove("FINAL.0.0", "removed completely");
intersectingNodes.forEach(node -> node.getEntities().remove(this));
pages.forEach(page -> page.getEntities().remove(this));
pages.forEach(page -> page.getSemanticEntities().remove(this));
intersectingNodes = new LinkedList<>();
relations.keySet()
.forEach(entity -> entity.getRelations().remove(this));
@ -215,6 +159,7 @@ public class TextEntity implements IEntity {
return textEntity.contains(this);
}
public boolean contains(TextEntity textEntity) {
if (this.textRange.contains(textEntity.getTextRange())) {
@ -239,7 +184,6 @@ public class TextEntity implements IEntity {
}
public boolean intersects(TextEntity textEntity) {
return this.textRange.intersects(textEntity.getTextRange()) //
@ -277,14 +221,6 @@ public class TextEntity implements IEntity {
notifyEntityUpdated();
}
public boolean matchesAnnotationId(String manualRedactionId) {
return getPositionsOnPagePerPage().stream()
.anyMatch(entityPosition -> entityPosition.getId().equals(manualRedactionId));
}
@Override
public String toString() {
@ -316,21 +252,6 @@ public class TextEntity implements IEntity {
}
/**
* @return true when this entity is of EntityType ENTITY or HINT
*/
public boolean validEntityType() {
return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT);
}
public boolean valid() {
return active() && validEntityType();
}
@Override
public String value() {
@ -339,41 +260,32 @@ public class TextEntity implements IEntity {
}
@Override
public void addEntityEventListener(EntityEventListener listener) {
entityEventListeners.add(listener);
}
@Override
public void removeEntityEventListener(EntityEventListener listener) {
entityEventListeners.remove(listener);
}
public void computeRelations() {
for (TextEntity textEntity : this.getDeepestFullyContainingNode().getEntities()) {
if (this.intersects(textEntity) && !this.equals(textEntity) && !textEntity.getEntityType().equals(EntityType.TEMPORARY)) {
if (textEntity.getTextRange().equals(this.getTextRange())) {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Equality(this, textEntity));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Equality(textEntity, this));
} else if (textEntity.containedBy(this)) {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Containment(this, textEntity));
} else if (this.containedBy(textEntity)) {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Containment(textEntity, this));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
} else {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
}
this.getDeepestFullyContainingNode().getEntities()
.stream()
.filter(semanticEntity -> semanticEntity instanceof TextEntity)
.map(semanticEntity -> (TextEntity) semanticEntity)
.forEach(textEntity -> {
if (this.intersects(textEntity) && !this.equals(textEntity) && !textEntity.getEntityType().equals(EntityType.TEMPORARY)) {
if (textEntity.getTextRange().equals(this.getTextRange())) {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Equality(this, textEntity));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Equality(textEntity, this));
} else if (textEntity.containedBy(this)) {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Containment(this, textEntity));
} else if (this.containedBy(textEntity)) {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Containment(textEntity, this));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
} else {
textEntity.getRelations().computeIfAbsent(this, k -> new HashSet<>()).add(new Intersection(textEntity, this));
this.getRelations().computeIfAbsent(textEntity, k -> new HashSet<>()).add(new Intersection(this, textEntity));
}
}
});
}
}
}
}

View File

@ -7,6 +7,7 @@ import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
@ -38,7 +39,7 @@ public abstract class AbstractSemanticNode implements GenericSemanticNode {
DocumentTree documentTree;
@Builder.Default
Set<TextEntity> entities = new HashSet<>();
Set<SemanticEntity> entities = new HashSet<>();
Map<Page, Rectangle2D> bBoxCache;
@ -61,7 +62,7 @@ public abstract class AbstractSemanticNode implements GenericSemanticNode {
@Override
public Map<Page, Rectangle2D> getBBox() {
public Map<Page, Rectangle2D>getBBox() {
if (bBoxCache == null) {
bBoxCache = GenericSemanticNode.super.getBBox();

View File

@ -5,6 +5,7 @@ import java.util.List;
import java.util.Set;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
@ -42,7 +43,7 @@ public class Page {
Footer footer;
@Builder.Default
Set<TextEntity> entities = new HashSet<>();
Set<SemanticEntity> semanticEntities = new HashSet<>();
@Builder.Default
Set<Image> images = new HashSet<>();

View File

@ -1,6 +1,9 @@
package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;

View File

@ -4,6 +4,7 @@ import static java.lang.String.format;
import java.awt.geom.Rectangle2D;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
@ -17,12 +18,16 @@ import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConsecutiveTextBlockCollector;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.iqser.red.service.redaction.v1.server.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
import com.iqser.red.service.redaction.v1.server.utils.RedactionSearchUtility;
@ -74,7 +79,25 @@ public interface SemanticNode {
*
* @return Set of all Entities associated with this Node
*/
Set<TextEntity> getEntities();
Set<SemanticEntity> getEntities();
default Set<TextEntity> getTextEntities() {
return getEntities().stream()
.filter(semanticEntity -> semanticEntity instanceof TextEntity)
.map(semanticEntity -> (TextEntity) semanticEntity)
.collect(Collectors.toSet());
}
default Set<TableEntity> getTableEntities() {
return getEntities().stream()
.filter(semanticEntity -> semanticEntity instanceof TableEntity)
.map(semanticEntity -> (TableEntity) semanticEntity)
.collect(Collectors.toSet());
}
/**
@ -85,9 +108,9 @@ public interface SemanticNode {
*/
default Stream<TextEntity> streamValidEntities() {
return getEntities().stream()
return getTextEntities().stream()
.filter(IEntity::active)
.filter(TextEntity::validEntityType);
.filter(SemanticEntity::validEntityType);
}
@ -638,18 +661,18 @@ public interface SemanticNode {
* This function is used during insertion of EntityNodes into the graph, it checks if the TextRange of the RedactionEntity intersects or even contains the RedactionEntity.
* It sets the fields accordingly and recursively calls this function on all its children.
*
* @param textEntity RedactionEntity, which is being inserted into the graph
* @param entity RedactionEntity, which is being inserted into the graph
*/
default void addThisToEntityIfIntersects(TextEntity textEntity) {
default void addThisToEntityIfIntersects(SemanticEntity entity) {
TextBlock textBlock = getTextBlock();
if (textBlock.getTextRange().intersects(textEntity.getTextRange())) {
if (textBlock.containsTextRange(textEntity.getTextRange())) {
textEntity.setDeepestFullyContainingNode(this);
if (textBlock.getTextRange().intersects(entity.getTextRange())) {
if (textBlock.containsTextRange(entity.getTextRange())) {
entity.setDeepestFullyContainingNode(this);
}
textEntity.addIntersectingNode(this);
getDocumentTree().findIntersectingChildNodes(getTreeId(), textEntity.getTextRange())
.forEach(node -> node.addThisToEntityIfIntersects(textEntity));
entity.addIntersectingNode(this);
getDocumentTree().findIntersectingChildNodes(getTreeId(), entity.getTextRange())
.forEach(node -> node.addThisToEntityIfIntersects(entity));
}
}
@ -838,4 +861,17 @@ public interface SemanticNode {
return pages.size() == 1 && pages.contains(page);
}
default String buildId(TextRange textRange, String type, EntityType entityType) {
Map<Page, List<Rectangle2D>> rectanglesPerLinePerPage = getPositionsPerPage(textRange);
return IdBuilder.buildId(rectanglesPerLinePerPage.keySet(),
rectanglesPerLinePerPage.values()
.stream()
.flatMap(Collection::stream)
.toList(),
type,
entityType.name());
}
}

View File

@ -15,6 +15,7 @@ import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
@ -48,7 +49,7 @@ public class Table implements SemanticNode {
TextBlock textBlock;
@Builder.Default
Set<TextEntity> entities = new HashSet<>();
Set<SemanticEntity> entities = new HashSet<>();
Map<Page, Rectangle2D> bBoxCache;
@ -109,7 +110,7 @@ public class Table implements SemanticNode {
.toList();
return streamTableCells().filter(tableCellNode -> vertebrateStudyCols.stream()
.anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value)))
.map(TableCell::getEntities)
.map(TableCell::getTextEntities)
.flatMap(Collection::stream);
}
@ -128,7 +129,7 @@ public class Table implements SemanticNode {
.toList();
return streamTableCells().filter(tableCellNode -> colsWithHeader.stream()
.anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values)))
.map(TableCell::getEntities)
.map(TableCell::getTextEntities)
.flatMap(Collection::stream);
}
@ -146,7 +147,7 @@ public class Table implements SemanticNode {
.filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type)
.anyMatch(types::contains))
.flatMap(this::streamRow)
.map(TableCell::getEntities)
.map(TableCell::getTextEntities)
.flatMap(Collection::stream);
}
@ -167,7 +168,7 @@ public class Table implements SemanticNode {
return entityTypes.containsAll(types);
})
.flatMap(this::streamRow)
.map(TableCell::getEntities)
.map(TableCell::getTextEntities)
.flatMap(Collection::stream);
}
@ -185,7 +186,7 @@ public class Table implements SemanticNode {
.filter(rowNumber -> streamTextEntitiesInRow(rowNumber).map(TextEntity::type)
.noneMatch(types::contains))
.flatMap(this::streamRow)
.map(TableCell::getEntities)
.map(TableCell::getTextEntities)
.flatMap(Collection::stream);
}
@ -426,4 +427,30 @@ public class Table implements SemanticNode {
visitor.visit(this);
}
public String asCsv() {
StringBuilder sb = new StringBuilder();
for (int row = 0; row < numberOfRows; row++) {
for (int col = 0; col < numberOfCols; col++) {
TableCell cell = getCell(row, col);
String cellText = cell.getTextBlock().getSearchText().replaceAll("\\r?\\n", " ").trim();
if (cellText.contains(",") || cellText.contains("\"")) {
cellText = "\"" + cellText.replace("\"", "\"\"") + "\"";
}
sb.append(cellText);
if (col < numberOfCols - 1) {
sb.append(",");
}
}
sb.append("\n");
}
return sb.toString();
}
}

View File

@ -5,6 +5,8 @@ import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.IntersectingNodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
@ -50,15 +52,15 @@ public class EntityCreationUtility {
}
public void addToPages(TextEntity entity) {
public void addToPages(SemanticEntity entity) {
Set<Page> pages = entity.getDeepestFullyContainingNode().getPages(entity.getTextRange());
entity.getPages().addAll(pages);
pages.forEach(page -> page.getEntities().add(entity));
pages.forEach(page -> page.getSemanticEntities().add(entity));
}
public void addEntityToNodeEntitySets(TextEntity entity) {
public void addEntityToNodeEntitySets(SemanticEntity entity) {
entity.getIntersectingNodes()
.forEach(node -> node.getEntities().add(entity));

View File

@ -26,6 +26,9 @@ public class Component {
List<Entity> references;
@Builder.Default
ComponentFormat componentFormat = ComponentFormat.TEXT;
public boolean addReference(Entity entity) {

View File

@ -0,0 +1,8 @@
package com.iqser.red.service.redaction.v1.server.model.component;
public enum ComponentFormat {
TEXT,
//OVERRIDE, //todo: do we need this?
CSV
}

View File

@ -36,6 +36,8 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityTyp
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
@ -70,11 +72,11 @@ public class EntityLogCreatorService {
ObservationRegistry observationRegistry;
private static boolean notFalsePositiveOrFalseRecommendationOrRemoval(TextEntity textEntity) {
private static boolean notFalsePositiveOrFalseRecommendationOrRemoval(SemanticEntity semanticEntity) {
return !(textEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) //
|| textEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION) //
|| textEntity.getEntityType().equals(EntityType.DICTIONARY_REMOVAL));
return !(semanticEntity.getEntityType().equals(EntityType.FALSE_POSITIVE) //
|| semanticEntity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION) //
|| semanticEntity.getEntityType().equals(EntityType.DICTIONARY_REMOVAL));
}
@ -175,7 +177,7 @@ public class EntityLogCreatorService {
List<EntityLogEntry> entries = new ArrayList<>();
List<TextEntity> textEntities = document.getEntities()
List<SemanticEntity> semanticEntities = document.getEntities()
.stream()
.filter(entity -> !entity.getValue().isEmpty())
.filter(EntityLogCreatorService::notFalsePositiveOrFalseRecommendationOrRemoval)
@ -190,7 +192,7 @@ public class EntityLogCreatorService {
.toList();
List<String> allIds = new ArrayList<>();
allIds.addAll(textEntities.stream()
allIds.addAll(semanticEntities.stream()
.flatMap(entity -> entity.getPositionsOnPagePerPage()
.stream()
.map(PositionOnPage::getId))
@ -204,7 +206,7 @@ public class EntityLogCreatorService {
Map<String, List<ManualChange>> manualChangesMap = getManualChangesByEntityLogIds(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), allIds);
textEntities.forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode, analysisNumber, manualChangesMap)));
semanticEntities.forEach(entityNode -> entries.addAll(toEntityLogEntries(entityNode, analysisNumber, manualChangesMap)));
images.forEach(imageNode -> entries.add(createEntityLogEntry(imageNode,
dossierTemplateId,
@ -219,19 +221,19 @@ public class EntityLogCreatorService {
}
public List<EntityLogEntry> toEntityLogEntries(TextEntity textEntity, int analysisNumber, Map<String, List<ManualChange>> existingManualChangesMap) {
public List<EntityLogEntry> toEntityLogEntries(SemanticEntity semanticEntity, int analysisNumber, Map<String, List<ManualChange>> existingManualChangesMap) {
List<EntityLogEntry> entityLogEntries = new ArrayList<>();
// split entity into multiple entries if it occurs on multiple pages, since FE can't handle multi page entities
for (PositionOnPage positionOnPage : textEntity.getPositionsOnPagePerPage()) {
for (PositionOnPage positionOnPage : semanticEntity.getPositionsOnPagePerPage()) {
List<Position> rectanglesPerLine = positionOnPage.getRectanglePerLine()
.stream()
.map(rectangle2D -> new Position(rectangle2D, positionOnPage.getPage().getNumber()))
.toList();
EntityLogEntry entityLogEntry = createEntityLogEntry(textEntity, analysisNumber, existingManualChangesMap.getOrDefault(positionOnPage.getId(), new ArrayList<>()));
EntityLogEntry entityLogEntry = createEntityLogEntry(semanticEntity, analysisNumber, existingManualChangesMap.getOrDefault(positionOnPage.getId(), new ArrayList<>()));
// set the ID from the positions, since it might contain a "-" with the page number if the entity is split across multiple pages
entityLogEntry.setId(positionOnPage.getId());
@ -317,12 +319,24 @@ public class EntityLogCreatorService {
}
private EntityLogEntry createEntityLogEntry(SemanticEntity entity, int analysisNumber, List<ManualChange> existingManualChanges) {
if (entity instanceof TextEntity textEntity) {
return createEntityLogEntry(textEntity, analysisNumber, existingManualChanges);
} else if (entity instanceof TableEntity tableEntity) {
return createEntityLogEntry(tableEntity, analysisNumber, existingManualChanges);
} else {
throw new UnsupportedOperationException("Wrong semantic entity type given for entity log entry creation!!");
}
}
private EntityLogEntry createEntityLogEntry(TextEntity entity, int analysisNumber, List<ManualChange> existingManualChanges) {
Set<String> referenceIds = new HashSet<>();
entity.references()
.stream()
.filter(TextEntity::active)
.filter(SemanticEntity::active)
.forEach(ref -> ref.getPositionsOnPagePerPage()
.forEach(pos -> referenceIds.add(pos.getId())));
@ -365,7 +379,42 @@ public class EntityLogCreatorService {
}
private int determinePageParagraphIndex(TextEntity textEntity, EntryType entryType) {
private EntityLogEntry createEntityLogEntry(TableEntity tableEntity, int analysisNumber, List<ManualChange> existingManualChanges) {
Set<String> referenceIds = new HashSet<>();
tableEntity.references()
.stream()
.filter(IEntity::applied)
.forEach(ref -> ref.getPositionsOnPagePerPage()
.forEach(pos -> referenceIds.add(pos.getId())));
EntryType entryType = buildEntryType(tableEntity);
List<ManualChange> allManualChanges = ManualChangeFactory.toLocalManualChangeList(tableEntity.getManualOverwrite().getManualChangeLog(), true, analysisNumber);
return EntityLogEntry.builder()
.reason(tableEntity.buildReason())
.legalBasis(tableEntity.legalBasis())
.value(tableEntity.getValue())
.type(tableEntity.type())
.section(tableEntity.getManualOverwrite().getSection()
.orElse(this.buildSectionString(tableEntity.getDeepestFullyContainingNode())))
.containingNodeId(tableEntity.getDeepestFullyContainingNode().getTreeId())
.closestHeadline(tableEntity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText())
.matchedRule(tableEntity.getMatchedRule().getRuleIdentifier().toString())
.startOffset(tableEntity.getTextRange().start())
.endOffset(tableEntity.getTextRange().end())
// .engines(getEngines(tableEntity.getEngines(), tableEntity.getManualOverwrite()))
.reference(referenceIds)
.manualChanges(ManualChangesUtils.mergeManualChanges(existingManualChanges, allManualChanges))
.state(buildEntryState(tableEntity))
.entryType(entryType)
.paragraphPageIdx(determinePageParagraphIndex(tableEntity, entryType))
.build();
}
private int determinePageParagraphIndex(SemanticEntity textEntity, EntryType entryType) {
int pageParagraphIdx = -1;
@ -414,7 +463,7 @@ public class EntityLogCreatorService {
public static EntryType buildEntryType(IEntity entity) {
if (entity instanceof TextEntity textEntity) {
if (entity instanceof SemanticEntity textEntity) {
return getEntryType(textEntity.getEntityType());
} else if (entity instanceof PrecursorEntity precursorEntity) {
if (precursorEntity.isRectangle()) {

View File

@ -18,8 +18,6 @@ import com.google.common.collect.Sets;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRecategorization;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.redaction.v1.server.model.PrecursorEntity;
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
@ -146,9 +144,9 @@ public class ManualChangesApplicationService {
Set<Page> newIntersectingPages = new HashSet<>(closestEntity.getPages());
Sets.difference(currentIntersectingPages, newIntersectingPages)
.forEach(removedPage -> removedPage.getEntities().remove(entityToBeResized));
.forEach(removedPage -> removedPage.getSemanticEntities().remove(entityToBeResized));
Sets.difference(newIntersectingPages, currentIntersectingPages)
.forEach(addedPage -> addedPage.getEntities().add(entityToBeResized));
.forEach(addedPage -> addedPage.getSemanticEntities().add(entityToBeResized));
entityToBeResized.setDeepestFullyContainingNode(closestEntity.getDeepestFullyContainingNode());
entityToBeResized.setIntersectingNodes(new ArrayList<>(newIntersectingNodes));

View File

@ -102,9 +102,9 @@ public class UnprocessedChangesService {
}
document.getEntities()
.forEach(textEntity -> {
.forEach(entity -> {
Set<String> processedIds = new HashSet<>();
for (var positionsOnPerPage : textEntity.getPositionsOnPagePerPage()) {
for (var positionsOnPerPage : entity.getPositionsOnPagePerPage()) {
if (processedIds.contains(positionsOnPerPage.getId())) {
continue;
}
@ -113,17 +113,18 @@ public class UnprocessedChangesService {
.stream()
.map(rectangle2D -> new Position(rectangle2D, positionsOnPerPage.getPage().getNumber()))
.collect(Collectors.toList());
unprocessedManualEntities.add(UnprocessedManualEntity.builder()
.annotationId(allAnnotationIds.stream()
.filter(textEntity::matchesAnnotationId)
.findFirst()
.orElse(""))
.textBefore(textEntity.getTextBefore())
.textAfter(textEntity.getTextAfter())
.section(textEntity.getManualOverwrite().getSection()
.orElse(textEntity.getDeepestFullyContainingNode().toString()))
.positions(positions)
.build());
UnprocessedManualEntity.UnprocessedManualEntityBuilder builder = UnprocessedManualEntity.builder()
.annotationId(allAnnotationIds.stream()
.filter(entity::matchesAnnotationId)
.findFirst()
.orElse(""))
.section(entity.getManualOverwrite().getSection()
.orElse(entity.getDeepestFullyContainingNode().toString()))
.positions(positions);
if (entity instanceof TextEntity textEntity) {
builder.textBefore(textEntity.getTextBefore()).textAfter(textEntity.getTextAfter());
}
unprocessedManualEntities.add(builder.build());
}
});

View File

@ -23,8 +23,11 @@ import java.util.stream.Stream;
import org.kie.api.runtime.KieSession;
import com.iqser.red.service.redaction.v1.server.model.component.Component;
import com.iqser.red.service.redaction.v1.server.model.component.ComponentFormat;
import com.iqser.red.service.redaction.v1.server.model.component.Entity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.RuleIdentifier;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils;
import com.iqser.red.service.redaction.v1.server.utils.DateConverter;
@ -98,6 +101,22 @@ public class ComponentCreationService {
.value(value)
.valueDescription(valueDescription)
.references(new LinkedList<>(references))
.componentFormat(ComponentFormat.TEXT)
.build());
}
public void create(String ruleIdentifier, String name, String value, String valueDescription, Collection<Entity> references, ComponentFormat componentFormat) {
referencedEntities.addAll(references);
kieSession.insert(Component.builder()
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
.name(name)
.value(value)
.valueDescription(valueDescription)
.references(new LinkedList<>(references))
.componentFormat(componentFormat)
.build());
}
@ -376,6 +395,27 @@ public class ComponentCreationService {
}
public void createComponentForTables(String ruleIdentifier, Collection<Entity> entities) {
entities.stream()
.filter(entity -> !referencedEntities.contains(entity))
.sorted(EntityComparators.first())
.forEach(entity -> {
String value = entity.getValue();
ComponentFormat componentFormat = ComponentFormat.TEXT;
SemanticNode containingNode = entity.getContainingNode();
if (containingNode instanceof TableCell cell) { // case for tables with 0 rows and 0 columns
value = cell.getTextBlock().getSearchText();
} else if (containingNode instanceof Table table) {
value = table.asCsv();
componentFormat = ComponentFormat.CSV;
}
create(ruleIdentifier, entity.getType(), value, "Table Entity", List.of(entity), componentFormat);
});
}
/**
* Converts entity values to the 'dd/MM/yyyy' format and joins them with ', '. If the value could not be parsed as a date, it will be created as is.
*

View File

@ -22,7 +22,9 @@ import com.iqser.red.service.redaction.v1.server.model.document.ConsecutiveBound
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
@ -906,6 +908,25 @@ public class EntityCreationService {
return byTextRange(textRange, type, entityType, node);
}
/**
* Creates a table entity based on the document table.
*
* @param table The table to base the table entity on.
* @param type The type of entity to create.
* @param entityType The entity's classification.
* @return The created {@link TableEntity}.
*/
public TableEntity bySemanticNode(Table table, String type, EntityType entityType) {
TableEntity tableEntity = TableEntity.initialEntityNode(table, type, entityType);
addListenerToEntity(tableEntity);
table.getDocumentTree().addEntityToGraph(tableEntity);
return tableEntity;
}
/**
* Expands a text entity's start boundary based on a regex pattern match.
@ -978,7 +999,8 @@ public class EntityCreationService {
if (node.getDocumentTree().getRoot().getNode().getEntities().contains(entity)) {
Optional<TextEntity> optionalTextEntity = node.getDocumentTree().getRoot().getNode().getEntities()
.stream()
.filter(e -> e.equals(entity) && e.type().equals(type))
.filter(e -> e instanceof TextEntity && e.equals(entity) && e.type().equals(type))
.map(e -> (TextEntity)e)
.peek(e -> e.addEngines(engines))
.findAny();
if (optionalTextEntity.isEmpty()) {
@ -1419,7 +1441,7 @@ public class EntityCreationService {
.filter(e -> e.equals(entity))//
.filter(e -> !e.getTextRange().equals(entity.getTextRange()))//
.findAny()
.ifPresent(e -> addDuplicateEntityToGraph(e, entity.getTextRange(), node));
.ifPresent(e -> addDuplicateEntityToGraph((TextEntity) e, entity.getTextRange(), node));
} else {
addListenerToEntity(entity);
@ -1469,13 +1491,13 @@ public class EntityCreationService {
}
additionalIntersectingNode.getEntities().add(entityToDuplicate);
additionalIntersectingNode.getPages(newTextRange)
.forEach(page -> page.getEntities().add(entityToDuplicate));
.forEach(page -> page.getSemanticEntities().add(entityToDuplicate));
entityToDuplicate.addIntersectingNode(additionalIntersectingNode);
});
}
private void addListenerToEntity(TextEntity textEntity) {
private void addListenerToEntity(IEntity textEntity) {
if(kieSessionUpdater != null) {
textEntity.addEntityEventListener(kieSessionUpdater);

View File

@ -162,7 +162,7 @@ public class EntityFromPrecursorCreationService {
correctEntity.getIntersectingNodes()
.forEach(n -> n.getEntities().add(correctEntity));
correctEntity.getPages()
.forEach(page -> page.getEntities().add(correctEntity));
.forEach(page -> page.getSemanticEntities().add(correctEntity));
correctEntity.addMatchedRules(precursorEntity.getMatchedRuleList());
correctEntity.setDictionaryEntry(precursorEntity.isDictionaryEntry());

View File

@ -105,6 +105,11 @@ public class ComponentDroolsExecutionService {
})
.forEach(kieSession::insert);
// todo?
// document.getPages().stream().map(Page::getTableEntities).flatMap(Collection::stream)
// //.filter(this::isApplied)
// .forEach(kieSession::insert);
fileAttributes.stream()
.filter(f -> f.getValue() != null)
.forEach(kieSession::insert);

View File

@ -28,6 +28,7 @@ import com.iqser.red.service.redaction.v1.server.logger.RulesLogger;
import com.iqser.red.service.redaction.v1.server.logger.TrackingAgendaEventListener;
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
@ -128,9 +129,9 @@ public class EntityDroolsExecutionService {
}
});
for (TextEntity textEntity : document.getEntities()) {
textEntity.addEntityEventListener(kieSessionUpdater);
textEntity.notifyEntityInserted();
for (SemanticEntity semanticEntity : document.getEntities()) {
semanticEntity.addEntityEventListener(kieSessionUpdater);
semanticEntity.notifyEntityInserted();
}
document.getPages()

View File

@ -9,6 +9,8 @@ import org.kie.api.runtime.rule.FactHandle;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityEventListener;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TableEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
@ -50,14 +52,14 @@ public class KieSessionUpdater implements EntityEventListener {
private void handleOnEntityEvent(IEntity entity, Consumer<Object> consumer) {
if (entity instanceof TextEntity textEntity) {
updateIntersectingNodes(textEntity);
textEntity.getRelations().values()
if (entity instanceof SemanticEntity semanticEntity) {
updateIntersectingNodes(semanticEntity);
semanticEntity.getRelations().values()
.stream()
.flatMap(Collection::stream)
.forEach(consumer);
textEntity.getRelations().keySet()
.forEach(k -> k.getRelations().getOrDefault(textEntity, Collections.emptySet())
semanticEntity.getRelations().keySet()
.forEach(k -> k.getRelations().getOrDefault(semanticEntity, Collections.emptySet())
.forEach(consumer));
}
@ -71,9 +73,9 @@ public class KieSessionUpdater implements EntityEventListener {
}
private void updateIntersectingNodes(TextEntity textEntity) {
private void updateIntersectingNodes(SemanticEntity semanticEntity) {
textEntity.getIntersectingNodes()
semanticEntity.getIntersectingNodes()
.forEach(this::updateFactIfPresent);
}

View File

@ -37,6 +37,8 @@ import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.tenantcommons.TenantContext;
import lombok.SneakyThrows;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, properties = {"application.type=DocuMine"})
public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
@ -262,6 +264,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
@Test
@SneakyThrows
public void testDoseMortalityExtraction() {
AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/Documine/Flora/VV-547525_Toxicidade_Oral_Aguda.pdf");
@ -269,9 +272,18 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
System.out.println("Start Full integration test");
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
System.out.println("Finished structure analysis");
analyzeService.analyze(request);
AnalyzeResult analyze = analyzeService.analyze(request);
System.out.println("Finished analysis");
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
String outputFileName = OsUtils.getTemporaryDirectory() + "/Documine.pdf";
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
fileOutputStream.write(annotateResponse.getDocument());
}
var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var doseMortality = componentLog.getComponentLogEntries()
.stream()

View File

@ -348,11 +348,11 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
.orElseThrow();
assertEquals(textEntity.getValue(), searchTerm);
assertTrue(pageNode.getEntities().contains(textEntity));
assertTrue(pageNode.getSemanticEntities().contains(textEntity));
assertTrue(document.getPages()
.stream()
.filter(page -> page != pageNode)
.noneMatch(page -> page.getEntities().contains(textEntity)));
.noneMatch(page -> page.getSemanticEntities().contains(textEntity)));
assertTrue(textEntity.getPages().contains(pageNode));
assertSameOffsetInAllIntersectingNodes(searchTerm, textEntity);
assertTrue(textEntity.getIntersectingNodes()

View File

@ -30,6 +30,7 @@ import com.iqser.red.service.redaction.v1.server.logger.Context;
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionarySearch;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
@ -200,14 +201,14 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest {
System.out.printf("%d Searches took %s s, average %.2f ms\n", numberOfRuns, ((float) totalSearchTime / 1000), totalSearchTime / numberOfRuns);
System.out.printf("%d Insertions took %s s, average %.2f ms\n", numberOfRuns, ((float) totalInsertTime / 1000), totalInsertTime / numberOfRuns);
System.out.printf("Found %d entities and saved %d\n", foundEntities.size(), document.getEntities().size());
for (TextEntity entity : document.getEntities()) {
for (TextEntity entity : document.getTextEntities()) {
var foundEntity = foundEntities.stream()
.filter(f -> f.getId().equals(entity.getId()))
.findFirst()
.get();
assertTrue(foundEntity.getTextRange().equals(entity.getTextRange()) || foundEntity.getDuplicateTextRanges().contains(entity.getTextRange()));
}
assert document.getEntities()
assert document.getTextEntities()
.stream()
.mapToInt(e -> e.getDuplicateTextRanges().size() + 1).sum() == foundEntities.size();
assert foundEntities.stream()
@ -225,10 +226,10 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest {
try (PDDocument pdDocument = Loader.loadPDF(fileResource.getFile())) {
for (Page page : document.getPages()) {
List<Rectangle2D> entityPositionsOnPage = page.getEntities()
List<Rectangle2D> entityPositionsOnPage = page.getSemanticEntities()
.stream()
.filter(entityNode -> !entityNode.removed())
.filter(TextEntity::applied)
.filter(SemanticEntity::applied)
.flatMap(entityNode -> entityNode.getPositionsOnPagePerPage()
.stream())
.filter(entityPosition -> entityPosition.getPage().equals(page))
@ -241,7 +242,7 @@ public class DocumentPerformanceIntegrationTest extends RulesIntegrationTest {
}
for (Page page : document.getPages()) {
List<Rectangle2D> entityPositionsOnPage = page.getEntities()
List<Rectangle2D> entityPositionsOnPage = page.getSemanticEntities()
.stream()
.filter(entityNode -> !entityNode.removed())
.filter(entityNode -> !entityNode.applied())

View File

@ -83,7 +83,7 @@ public class TableTest extends BuildDocumentIntegrationTest {
file);
PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null);
var entityLayerGroup = EntityVisualizationUtility.createEntityLayerGroup(document.getEntities(), Color.MAGENTA);
var entityLayerGroup = EntityVisualizationUtility.createEntityLayerGroup(document.getTextEntities(), Color.MAGENTA);
viewerDocumentService.addLayerGroups(file, file, List.of(entityLayerGroup));
}

View File

@ -22,7 +22,7 @@ public class Cbi11Test extends RulesIntegrationTest {
doAnalysis(document, Collections.emptyList());
List<String> authorNames = document.getEntities()
List<String> authorNames = document.getTextEntities()
.stream()
.map(Dictionary::splitIntoAuthorNames)
.flatMap(Collection::stream)

View File

@ -9,6 +9,7 @@ import java.util.Set;
import java.util.stream.Collectors;
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.model.document.entity.SemanticEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.knecon.fforesight.service.viewerdoc.LayerIdentifier;
@ -46,9 +47,9 @@ public class EntityVisualizationUtility {
private static List<ColoredRectangle> getEntityRectangles(Color color, Page page) {
return page.getEntities()
return page.getSemanticEntities()
.stream()
.map(TextEntity::getPositionsOnPagePerPage)
.map(SemanticEntity::getPositionsOnPagePerPage)
.flatMap(Collection::stream)
.filter(p -> p.getPage().equals(page))
.map(PositionOnPage::getRectanglePerLine)

View File

@ -1155,6 +1155,15 @@ rule "DOC.35.0: Doses (mg/kg bodyweight)"
end
rule "DOC.100.0: Create TableEntities for all Tables"
when
$table: Table()
then
TableEntity tableEntity = entityCreationService.bySemanticNode($table, "table", EntityType.ENTITY);
tableEntity.apply("DOC.100.0", "Table found.", "n-a");
end
//------------------------------------ AI rules ------------------------------------
// Rule unit: AI.4

View File

@ -461,6 +461,14 @@ rule "UsedApproach.1.0: Used approach not found and thus 'Individual'"
end
rule "TableComponents.900.0: Create components for all table entities."
salience -900
when
$tables: List() from collect (Entity(type == "Table"))
then
componentCreationService.createComponentForTables("TableComponents.900.0", $tables);
end
rule "DefaultComponents.999.0: Create components for all unmapped entities."
salience -999
when