RED-10708: Tables as components in DM

This commit is contained in:
maverickstuder 2025-01-31 12:33:46 +01:00
parent 723263a7c4
commit 229d1e98ac
9 changed files with 63 additions and 65 deletions

View File

@ -369,18 +369,19 @@ public class DocumentTree {
TextBlock textBlock = entity.getDeepestFullyContainingNode().getTextBlock();
if (entity instanceof TextEntity textEntity) {
EntityEnrichmentService.enrichEntity(textEntity, textBlock);
}
EntityCreationUtility.addToPages(entity);
EntityCreationUtility.addEntityToNodeEntitySets(entity);
if (entity.getEntityType().equals(EntityType.TEMPORARY)) {
return;
}
if (entity instanceof TextEntity textEntity) {
EntityEnrichmentService.enrichEntity(textEntity, textBlock);
textEntity.computeRelations();
entity.notifyEntityInserted(); // todo: table entity currently causes loop?
}
EntityCreationUtility.addEntityToNodeEntitySets(entity);
entity.computeRelations();
entity.notifyEntityInserted(); // todo: table entity currently causes loop?
}

View File

@ -10,6 +10,8 @@ import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import org.apache.commons.collections4.map.HashedMap;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
@ -56,6 +58,7 @@ public abstract class SemanticEntity implements IEntity {
}
@Override
public boolean valid() {
return active() && validEntityType();
@ -107,4 +110,21 @@ public abstract class SemanticEntity implements IEntity {
return entityEventListeners;
}
public abstract void computeRelations();
public void removeFromGraph() {
remove("FINAL.0.0", "removed completely");
intersectingNodes.forEach(node -> node.getEntities().remove(this));
pages.forEach(page -> page.getSemanticEntities().remove(this));
intersectingNodes = new LinkedList<>();
relations.keySet()
.forEach(entity -> entity.getRelations().remove(this));
relations = new HashedMap<>();
deepestFullyContainingNode = null;
pages = new HashSet<>();
}
}

View File

@ -72,23 +72,7 @@ public class TableEntity extends SemanticEntity {
.orElse(NodeType.TABLE.toString());
}
public void removeFromGraph() {
remove("FINAL.0.0", "removed completely");
intersectingNodes.forEach(node -> node.getEntities().remove(this));
pages.forEach(page -> page.getSemanticEntities().remove(this));
intersectingNodes = new LinkedList<>();
relations.keySet()
.forEach(entity -> entity.getRelations().remove(this));
relations = new HashedMap<>();
deepestFullyContainingNode = null;
pages = new HashSet<>();
}
@Override
public List<PositionOnPage> getPositionsOnPagePerPage() {
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {
@ -108,4 +92,11 @@ public class TableEntity extends SemanticEntity {
return table.asCsv();
}
@Override
public void computeRelations() {
// NO - OP
// can be implemented in the future
}
}

View File

@ -111,20 +111,7 @@ public class TextEntity extends SemanticEntity {
}
public void removeFromGraph() {
remove("FINAL.0.0", "removed completely");
intersectingNodes.forEach(node -> node.getEntities().remove(this));
pages.forEach(page -> page.getSemanticEntities().remove(this));
intersectingNodes = new LinkedList<>();
relations.keySet()
.forEach(entity -> entity.getRelations().remove(this));
relations = new HashedMap<>();
deepestFullyContainingNode = null;
pages = new HashSet<>();
}
@Override
public List<PositionOnPage> getPositionsOnPagePerPage() {
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {

View File

@ -20,12 +20,15 @@ public class EntityEnrichmentService {
public void enrichEntity(TextEntity entity, TextBlock textBlock) {
entity.setValue(textBlock.subSequence(entity.getTextRange()).toString());
entity.setTextAfter(findTextAfter(entity.getTextRange().end(), textBlock));
entity.setTextBefore(findTextBefore(entity.getTextRange().start(), textBlock));
}
private String findTextAfter(int index, TextBlock textBlock) {
int endOffset = Math.min(index + SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().end());
String textAfter = textBlock.subSequence(index, endOffset).toString();
if (!textAfter.isBlank()) {
@ -38,7 +41,9 @@ public class EntityEnrichmentService {
return "";
}
private String findTextBefore(int index, TextBlock textBlock) {
int offsetBefore = Math.max(index - SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().start());
String textBefore = textBlock.subSequence(offsetBefore, index).toString();
if (!textBefore.isBlank()) {
@ -51,13 +56,17 @@ public class EntityEnrichmentService {
return "";
}
private List<String> splitToWordsAndRemoveEmptyWords(String text) {
return Arrays.stream(text.split(" "))
.filter(word -> !Objects.equals("", word))
.toList();
}
private String concatWordsBefore(List<String> words, boolean endWithSpace) {
StringBuilder sb = new StringBuilder();
for (String word : words) {
sb.append(word).append(" ");
@ -66,7 +75,9 @@ public class EntityEnrichmentService {
return endWithSpace ? result + " " : result;
}
private String concatWordsAfter(List<String> words, boolean startWithSpace) {
StringBuilder sb = new StringBuilder();
for (String word : words) {
sb.append(word).append(" ");

View File

@ -3,6 +3,5 @@ package com.iqser.red.service.redaction.v1.server.model.component;
public enum ComponentFormat {
TEXT,
//OVERRIDE, //todo: do we need this?
CSV
}

View File

@ -908,23 +908,32 @@ public class EntityCreationService {
return byTextRange(textRange, type, entityType, node);
}
/**
* Creates a table entity based on the document table.
*
* @param table The table to base the table entity on.
* @param table The table to base the table entity on.
* @param type The type of entity to create.
* @param entityType The entity's classification.
* @return The created {@link TableEntity}.
* @return An optional containing the created {@link TableEntity}.
*/
public TableEntity bySemanticNode(Table table, String type, EntityType entityType) {
public Optional<TableEntity> bySemanticNode(Table table, String type, EntityType entityType) {
TableEntity tableEntity = TableEntity.initialEntityNode(table, type, entityType);
addListenerToEntity(tableEntity);
Optional<TableEntity> optionalTableEntity = table.getDocumentTree().getRoot().getNode().getEntities()
.stream()
.filter(e -> e instanceof TableEntity && e.equals(tableEntity) && e.type().equals(type))
.map(e -> (TableEntity) e)
.findAny();
if (optionalTableEntity.isPresent()) {
return optionalTableEntity;
}
addListenerToEntity(tableEntity);
table.getDocumentTree().addEntityToGraph(tableEntity);
return tableEntity;
return Optional.of(tableEntity);
}
@ -1000,7 +1009,7 @@ public class EntityCreationService {
Optional<TextEntity> optionalTextEntity = node.getDocumentTree().getRoot().getNode().getEntities()
.stream()
.filter(e -> e instanceof TextEntity && e.equals(entity) && e.type().equals(type))
.map(e -> (TextEntity)e)
.map(e -> (TextEntity) e)
.peek(e -> e.addEngines(engines))
.findAny();
if (optionalTextEntity.isEmpty()) {
@ -1499,11 +1508,9 @@ public class EntityCreationService {
private void addListenerToEntity(IEntity textEntity) {
if(kieSessionUpdater != null) {
if (kieSessionUpdater != null) {
textEntity.addEntityEventListener(kieSessionUpdater);
}
}
}

View File

@ -1155,15 +1155,6 @@ rule "DOC.35.0: Doses (mg/kg bodyweight)"
end
rule "DOC.100.0: Create TableEntities for all Tables"
when
$table: Table()
then
TableEntity tableEntity = entityCreationService.bySemanticNode($table, "table", EntityType.ENTITY);
tableEntity.apply("DOC.100.0", "Table found.", "n-a");
end
//------------------------------------ AI rules ------------------------------------
// Rule unit: AI.4

View File

@ -460,15 +460,6 @@ rule "UsedApproach.1.0: Used approach not found and thus 'Individual'"
componentCreationService.create("UsedApproach.1.0", "What_was_the_approach_used", "Individual", "'Group' when approach used is present, else 'Individual'");
end
rule "TableComponents.900.0: Create components for all table entities."
salience -900
when
$tables: List() from collect (Entity(type == "Table"))
then
componentCreationService.createComponentForTables("TableComponents.900.0", $tables);
end
rule "DefaultComponents.999.0: Create components for all unmapped entities."
salience -999
when