RED-10708: Tables as components in DM

This commit is contained in:
maverickstuder 2025-01-31 12:33:46 +01:00
parent 723263a7c4
commit 229d1e98ac
9 changed files with 63 additions and 65 deletions

View File

@ -369,18 +369,19 @@ public class DocumentTree {
TextBlock textBlock = entity.getDeepestFullyContainingNode().getTextBlock(); TextBlock textBlock = entity.getDeepestFullyContainingNode().getTextBlock();
if (entity instanceof TextEntity textEntity) {
EntityEnrichmentService.enrichEntity(textEntity, textBlock);
}
EntityCreationUtility.addToPages(entity); EntityCreationUtility.addToPages(entity);
EntityCreationUtility.addEntityToNodeEntitySets(entity);
if (entity.getEntityType().equals(EntityType.TEMPORARY)) { if (entity.getEntityType().equals(EntityType.TEMPORARY)) {
return; return;
} }
if (entity instanceof TextEntity textEntity) { entity.computeRelations();
EntityEnrichmentService.enrichEntity(textEntity, textBlock); entity.notifyEntityInserted(); // todo: table entity currently causes loop?
textEntity.computeRelations();
entity.notifyEntityInserted(); // todo: table entity currently causes loop?
}
EntityCreationUtility.addEntityToNodeEntitySets(entity);
} }

View File

@ -10,6 +10,8 @@ import java.util.Map;
import java.util.PriorityQueue; import java.util.PriorityQueue;
import java.util.Set; import java.util.Set;
import org.apache.commons.collections4.map.HashedMap;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
@ -56,6 +58,7 @@ public abstract class SemanticEntity implements IEntity {
} }
@Override
public boolean valid() { public boolean valid() {
return active() && validEntityType(); return active() && validEntityType();
@ -107,4 +110,21 @@ public abstract class SemanticEntity implements IEntity {
return entityEventListeners; return entityEventListeners;
} }
public abstract void computeRelations();
public void removeFromGraph() {
remove("FINAL.0.0", "removed completely");
intersectingNodes.forEach(node -> node.getEntities().remove(this));
pages.forEach(page -> page.getSemanticEntities().remove(this));
intersectingNodes = new LinkedList<>();
relations.keySet()
.forEach(entity -> entity.getRelations().remove(this));
relations = new HashedMap<>();
deepestFullyContainingNode = null;
pages = new HashSet<>();
}
} }

View File

@ -72,23 +72,7 @@ public class TableEntity extends SemanticEntity {
.orElse(NodeType.TABLE.toString()); .orElse(NodeType.TABLE.toString());
} }
@Override
public void removeFromGraph() {
remove("FINAL.0.0", "removed completely");
intersectingNodes.forEach(node -> node.getEntities().remove(this));
pages.forEach(page -> page.getSemanticEntities().remove(this));
intersectingNodes = new LinkedList<>();
relations.keySet()
.forEach(entity -> entity.getRelations().remove(this));
relations = new HashedMap<>();
deepestFullyContainingNode = null;
pages = new HashSet<>();
}
public List<PositionOnPage> getPositionsOnPagePerPage() { public List<PositionOnPage> getPositionsOnPagePerPage() {
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) { if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {
@ -108,4 +92,11 @@ public class TableEntity extends SemanticEntity {
return table.asCsv(); return table.asCsv();
} }
@Override
public void computeRelations() {
// NO - OP
// can be implemented in the future
}
} }

View File

@ -111,20 +111,7 @@ public class TextEntity extends SemanticEntity {
} }
public void removeFromGraph() { @Override
remove("FINAL.0.0", "removed completely");
intersectingNodes.forEach(node -> node.getEntities().remove(this));
pages.forEach(page -> page.getSemanticEntities().remove(this));
intersectingNodes = new LinkedList<>();
relations.keySet()
.forEach(entity -> entity.getRelations().remove(this));
relations = new HashedMap<>();
deepestFullyContainingNode = null;
pages = new HashSet<>();
}
public List<PositionOnPage> getPositionsOnPagePerPage() { public List<PositionOnPage> getPositionsOnPagePerPage() {
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) { if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {

View File

@ -20,12 +20,15 @@ public class EntityEnrichmentService {
public void enrichEntity(TextEntity entity, TextBlock textBlock) { public void enrichEntity(TextEntity entity, TextBlock textBlock) {
entity.setValue(textBlock.subSequence(entity.getTextRange()).toString()); entity.setValue(textBlock.subSequence(entity.getTextRange()).toString());
entity.setTextAfter(findTextAfter(entity.getTextRange().end(), textBlock)); entity.setTextAfter(findTextAfter(entity.getTextRange().end(), textBlock));
entity.setTextBefore(findTextBefore(entity.getTextRange().start(), textBlock)); entity.setTextBefore(findTextBefore(entity.getTextRange().start(), textBlock));
} }
private String findTextAfter(int index, TextBlock textBlock) { private String findTextAfter(int index, TextBlock textBlock) {
int endOffset = Math.min(index + SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().end()); int endOffset = Math.min(index + SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().end());
String textAfter = textBlock.subSequence(index, endOffset).toString(); String textAfter = textBlock.subSequence(index, endOffset).toString();
if (!textAfter.isBlank()) { if (!textAfter.isBlank()) {
@ -38,7 +41,9 @@ public class EntityEnrichmentService {
return ""; return "";
} }
private String findTextBefore(int index, TextBlock textBlock) { private String findTextBefore(int index, TextBlock textBlock) {
int offsetBefore = Math.max(index - SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().start()); int offsetBefore = Math.max(index - SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().start());
String textBefore = textBlock.subSequence(offsetBefore, index).toString(); String textBefore = textBlock.subSequence(offsetBefore, index).toString();
if (!textBefore.isBlank()) { if (!textBefore.isBlank()) {
@ -51,13 +56,17 @@ public class EntityEnrichmentService {
return ""; return "";
} }
private List<String> splitToWordsAndRemoveEmptyWords(String text) { private List<String> splitToWordsAndRemoveEmptyWords(String text) {
return Arrays.stream(text.split(" ")) return Arrays.stream(text.split(" "))
.filter(word -> !Objects.equals("", word)) .filter(word -> !Objects.equals("", word))
.toList(); .toList();
} }
private String concatWordsBefore(List<String> words, boolean endWithSpace) { private String concatWordsBefore(List<String> words, boolean endWithSpace) {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
for (String word : words) { for (String word : words) {
sb.append(word).append(" "); sb.append(word).append(" ");
@ -66,7 +75,9 @@ public class EntityEnrichmentService {
return endWithSpace ? result + " " : result; return endWithSpace ? result + " " : result;
} }
private String concatWordsAfter(List<String> words, boolean startWithSpace) { private String concatWordsAfter(List<String> words, boolean startWithSpace) {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
for (String word : words) { for (String word : words) {
sb.append(word).append(" "); sb.append(word).append(" ");

View File

@ -3,6 +3,5 @@ package com.iqser.red.service.redaction.v1.server.model.component;
public enum ComponentFormat { public enum ComponentFormat {
TEXT, TEXT,
//OVERRIDE, //todo: do we need this?
CSV CSV
} }

View File

@ -908,23 +908,32 @@ public class EntityCreationService {
return byTextRange(textRange, type, entityType, node); return byTextRange(textRange, type, entityType, node);
} }
/** /**
* Creates a table entity based on the document table. * Creates a table entity based on the document table.
* *
* @param table The table to base the table entity on. * @param table The table to base the table entity on.
* @param type The type of entity to create. * @param type The type of entity to create.
* @param entityType The entity's classification. * @param entityType The entity's classification.
* @return The created {@link TableEntity}. * @return An optional containing the created {@link TableEntity}.
*/ */
public TableEntity bySemanticNode(Table table, String type, EntityType entityType) { public Optional<TableEntity> bySemanticNode(Table table, String type, EntityType entityType) {
TableEntity tableEntity = TableEntity.initialEntityNode(table, type, entityType); TableEntity tableEntity = TableEntity.initialEntityNode(table, type, entityType);
addListenerToEntity(tableEntity); Optional<TableEntity> optionalTableEntity = table.getDocumentTree().getRoot().getNode().getEntities()
.stream()
.filter(e -> e instanceof TableEntity && e.equals(tableEntity) && e.type().equals(type))
.map(e -> (TableEntity) e)
.findAny();
if (optionalTableEntity.isPresent()) {
return optionalTableEntity;
}
addListenerToEntity(tableEntity);
table.getDocumentTree().addEntityToGraph(tableEntity); table.getDocumentTree().addEntityToGraph(tableEntity);
return tableEntity; return Optional.of(tableEntity);
} }
@ -1000,7 +1009,7 @@ public class EntityCreationService {
Optional<TextEntity> optionalTextEntity = node.getDocumentTree().getRoot().getNode().getEntities() Optional<TextEntity> optionalTextEntity = node.getDocumentTree().getRoot().getNode().getEntities()
.stream() .stream()
.filter(e -> e instanceof TextEntity && e.equals(entity) && e.type().equals(type)) .filter(e -> e instanceof TextEntity && e.equals(entity) && e.type().equals(type))
.map(e -> (TextEntity)e) .map(e -> (TextEntity) e)
.peek(e -> e.addEngines(engines)) .peek(e -> e.addEngines(engines))
.findAny(); .findAny();
if (optionalTextEntity.isEmpty()) { if (optionalTextEntity.isEmpty()) {
@ -1499,11 +1508,9 @@ public class EntityCreationService {
private void addListenerToEntity(IEntity textEntity) { private void addListenerToEntity(IEntity textEntity) {
if(kieSessionUpdater != null) { if (kieSessionUpdater != null) {
textEntity.addEntityEventListener(kieSessionUpdater); textEntity.addEntityEventListener(kieSessionUpdater);
} }
} }
} }

View File

@ -1155,15 +1155,6 @@ rule "DOC.35.0: Doses (mg/kg bodyweight)"
end end
rule "DOC.100.0: Create TableEntities for all Tables"
when
$table: Table()
then
TableEntity tableEntity = entityCreationService.bySemanticNode($table, "table", EntityType.ENTITY);
tableEntity.apply("DOC.100.0", "Table found.", "n-a");
end
//------------------------------------ AI rules ------------------------------------ //------------------------------------ AI rules ------------------------------------
// Rule unit: AI.4 // Rule unit: AI.4

View File

@ -460,15 +460,6 @@ rule "UsedApproach.1.0: Used approach not found and thus 'Individual'"
componentCreationService.create("UsedApproach.1.0", "What_was_the_approach_used", "Individual", "'Group' when approach used is present, else 'Individual'"); componentCreationService.create("UsedApproach.1.0", "What_was_the_approach_used", "Individual", "'Group' when approach used is present, else 'Individual'");
end end
rule "TableComponents.900.0: Create components for all table entities."
salience -900
when
$tables: List() from collect (Entity(type == "Table"))
then
componentCreationService.createComponentForTables("TableComponents.900.0", $tables);
end
rule "DefaultComponents.999.0: Create components for all unmapped entities." rule "DefaultComponents.999.0: Create components for all unmapped entities."
salience -999 salience -999
when when