RED-10708: Tables as components in DM
This commit is contained in:
parent
723263a7c4
commit
229d1e98ac
@ -369,18 +369,19 @@ public class DocumentTree {
|
|||||||
|
|
||||||
TextBlock textBlock = entity.getDeepestFullyContainingNode().getTextBlock();
|
TextBlock textBlock = entity.getDeepestFullyContainingNode().getTextBlock();
|
||||||
|
|
||||||
|
if (entity instanceof TextEntity textEntity) {
|
||||||
|
EntityEnrichmentService.enrichEntity(textEntity, textBlock);
|
||||||
|
}
|
||||||
|
|
||||||
EntityCreationUtility.addToPages(entity);
|
EntityCreationUtility.addToPages(entity);
|
||||||
|
EntityCreationUtility.addEntityToNodeEntitySets(entity);
|
||||||
|
|
||||||
if (entity.getEntityType().equals(EntityType.TEMPORARY)) {
|
if (entity.getEntityType().equals(EntityType.TEMPORARY)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (entity instanceof TextEntity textEntity) {
|
entity.computeRelations();
|
||||||
EntityEnrichmentService.enrichEntity(textEntity, textBlock);
|
entity.notifyEntityInserted(); // todo: table entity currently causes loop?
|
||||||
textEntity.computeRelations();
|
|
||||||
entity.notifyEntityInserted(); // todo: table entity currently causes loop?
|
|
||||||
}
|
|
||||||
EntityCreationUtility.addEntityToNodeEntitySets(entity);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -10,6 +10,8 @@ import java.util.Map;
|
|||||||
import java.util.PriorityQueue;
|
import java.util.PriorityQueue;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.commons.collections4.map.HashedMap;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||||
|
|
||||||
@ -56,6 +58,7 @@ public abstract class SemanticEntity implements IEntity {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
public boolean valid() {
|
public boolean valid() {
|
||||||
|
|
||||||
return active() && validEntityType();
|
return active() && validEntityType();
|
||||||
@ -107,4 +110,21 @@ public abstract class SemanticEntity implements IEntity {
|
|||||||
return entityEventListeners;
|
return entityEventListeners;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public abstract void computeRelations();
|
||||||
|
|
||||||
|
|
||||||
|
public void removeFromGraph() {
|
||||||
|
|
||||||
|
remove("FINAL.0.0", "removed completely");
|
||||||
|
intersectingNodes.forEach(node -> node.getEntities().remove(this));
|
||||||
|
pages.forEach(page -> page.getSemanticEntities().remove(this));
|
||||||
|
intersectingNodes = new LinkedList<>();
|
||||||
|
relations.keySet()
|
||||||
|
.forEach(entity -> entity.getRelations().remove(this));
|
||||||
|
relations = new HashedMap<>();
|
||||||
|
deepestFullyContainingNode = null;
|
||||||
|
pages = new HashSet<>();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -72,23 +72,7 @@ public class TableEntity extends SemanticEntity {
|
|||||||
.orElse(NodeType.TABLE.toString());
|
.orElse(NodeType.TABLE.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public void removeFromGraph() {
|
|
||||||
|
|
||||||
remove("FINAL.0.0", "removed completely");
|
|
||||||
intersectingNodes.forEach(node -> node.getEntities().remove(this));
|
|
||||||
pages.forEach(page -> page.getSemanticEntities().remove(this));
|
|
||||||
intersectingNodes = new LinkedList<>();
|
|
||||||
relations.keySet()
|
|
||||||
.forEach(entity -> entity.getRelations().remove(this));
|
|
||||||
relations = new HashedMap<>();
|
|
||||||
deepestFullyContainingNode = null;
|
|
||||||
pages = new HashSet<>();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public List<PositionOnPage> getPositionsOnPagePerPage() {
|
public List<PositionOnPage> getPositionsOnPagePerPage() {
|
||||||
|
|
||||||
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {
|
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {
|
||||||
@ -108,4 +92,11 @@ public class TableEntity extends SemanticEntity {
|
|||||||
return table.asCsv();
|
return table.asCsv();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void computeRelations() {
|
||||||
|
// NO - OP
|
||||||
|
// can be implemented in the future
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -111,20 +111,7 @@ public class TextEntity extends SemanticEntity {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void removeFromGraph() {
|
@Override
|
||||||
|
|
||||||
remove("FINAL.0.0", "removed completely");
|
|
||||||
intersectingNodes.forEach(node -> node.getEntities().remove(this));
|
|
||||||
pages.forEach(page -> page.getSemanticEntities().remove(this));
|
|
||||||
intersectingNodes = new LinkedList<>();
|
|
||||||
relations.keySet()
|
|
||||||
.forEach(entity -> entity.getRelations().remove(this));
|
|
||||||
relations = new HashedMap<>();
|
|
||||||
deepestFullyContainingNode = null;
|
|
||||||
pages = new HashSet<>();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public List<PositionOnPage> getPositionsOnPagePerPage() {
|
public List<PositionOnPage> getPositionsOnPagePerPage() {
|
||||||
|
|
||||||
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {
|
if (positionsOnPagePerPage == null || positionsOnPagePerPage.isEmpty()) {
|
||||||
|
|||||||
@ -20,12 +20,15 @@ public class EntityEnrichmentService {
|
|||||||
|
|
||||||
|
|
||||||
public void enrichEntity(TextEntity entity, TextBlock textBlock) {
|
public void enrichEntity(TextEntity entity, TextBlock textBlock) {
|
||||||
|
|
||||||
entity.setValue(textBlock.subSequence(entity.getTextRange()).toString());
|
entity.setValue(textBlock.subSequence(entity.getTextRange()).toString());
|
||||||
entity.setTextAfter(findTextAfter(entity.getTextRange().end(), textBlock));
|
entity.setTextAfter(findTextAfter(entity.getTextRange().end(), textBlock));
|
||||||
entity.setTextBefore(findTextBefore(entity.getTextRange().start(), textBlock));
|
entity.setTextBefore(findTextBefore(entity.getTextRange().start(), textBlock));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private String findTextAfter(int index, TextBlock textBlock) {
|
private String findTextAfter(int index, TextBlock textBlock) {
|
||||||
|
|
||||||
int endOffset = Math.min(index + SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().end());
|
int endOffset = Math.min(index + SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().end());
|
||||||
String textAfter = textBlock.subSequence(index, endOffset).toString();
|
String textAfter = textBlock.subSequence(index, endOffset).toString();
|
||||||
if (!textAfter.isBlank()) {
|
if (!textAfter.isBlank()) {
|
||||||
@ -38,7 +41,9 @@ public class EntityEnrichmentService {
|
|||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private String findTextBefore(int index, TextBlock textBlock) {
|
private String findTextBefore(int index, TextBlock textBlock) {
|
||||||
|
|
||||||
int offsetBefore = Math.max(index - SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().start());
|
int offsetBefore = Math.max(index - SURROUNDING_WORDS_OFFSET_WINDOW, textBlock.getTextRange().start());
|
||||||
String textBefore = textBlock.subSequence(offsetBefore, index).toString();
|
String textBefore = textBlock.subSequence(offsetBefore, index).toString();
|
||||||
if (!textBefore.isBlank()) {
|
if (!textBefore.isBlank()) {
|
||||||
@ -51,13 +56,17 @@ public class EntityEnrichmentService {
|
|||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private List<String> splitToWordsAndRemoveEmptyWords(String text) {
|
private List<String> splitToWordsAndRemoveEmptyWords(String text) {
|
||||||
|
|
||||||
return Arrays.stream(text.split(" "))
|
return Arrays.stream(text.split(" "))
|
||||||
.filter(word -> !Objects.equals("", word))
|
.filter(word -> !Objects.equals("", word))
|
||||||
.toList();
|
.toList();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private String concatWordsBefore(List<String> words, boolean endWithSpace) {
|
private String concatWordsBefore(List<String> words, boolean endWithSpace) {
|
||||||
|
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
for (String word : words) {
|
for (String word : words) {
|
||||||
sb.append(word).append(" ");
|
sb.append(word).append(" ");
|
||||||
@ -66,7 +75,9 @@ public class EntityEnrichmentService {
|
|||||||
return endWithSpace ? result + " " : result;
|
return endWithSpace ? result + " " : result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private String concatWordsAfter(List<String> words, boolean startWithSpace) {
|
private String concatWordsAfter(List<String> words, boolean startWithSpace) {
|
||||||
|
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
for (String word : words) {
|
for (String word : words) {
|
||||||
sb.append(word).append(" ");
|
sb.append(word).append(" ");
|
||||||
|
|||||||
@ -3,6 +3,5 @@ package com.iqser.red.service.redaction.v1.server.model.component;
|
|||||||
public enum ComponentFormat {
|
public enum ComponentFormat {
|
||||||
|
|
||||||
TEXT,
|
TEXT,
|
||||||
//OVERRIDE, //todo: do we need this?
|
|
||||||
CSV
|
CSV
|
||||||
}
|
}
|
||||||
|
|||||||
@ -908,23 +908,32 @@ public class EntityCreationService {
|
|||||||
return byTextRange(textRange, type, entityType, node);
|
return byTextRange(textRange, type, entityType, node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a table entity based on the document table.
|
* Creates a table entity based on the document table.
|
||||||
*
|
*
|
||||||
* @param table The table to base the table entity on.
|
* @param table The table to base the table entity on.
|
||||||
* @param type The type of entity to create.
|
* @param type The type of entity to create.
|
||||||
* @param entityType The entity's classification.
|
* @param entityType The entity's classification.
|
||||||
* @return The created {@link TableEntity}.
|
* @return An optional containing the created {@link TableEntity}.
|
||||||
*/
|
*/
|
||||||
public TableEntity bySemanticNode(Table table, String type, EntityType entityType) {
|
public Optional<TableEntity> bySemanticNode(Table table, String type, EntityType entityType) {
|
||||||
|
|
||||||
TableEntity tableEntity = TableEntity.initialEntityNode(table, type, entityType);
|
TableEntity tableEntity = TableEntity.initialEntityNode(table, type, entityType);
|
||||||
|
|
||||||
addListenerToEntity(tableEntity);
|
Optional<TableEntity> optionalTableEntity = table.getDocumentTree().getRoot().getNode().getEntities()
|
||||||
|
.stream()
|
||||||
|
.filter(e -> e instanceof TableEntity && e.equals(tableEntity) && e.type().equals(type))
|
||||||
|
.map(e -> (TableEntity) e)
|
||||||
|
.findAny();
|
||||||
|
if (optionalTableEntity.isPresent()) {
|
||||||
|
return optionalTableEntity;
|
||||||
|
}
|
||||||
|
|
||||||
|
addListenerToEntity(tableEntity);
|
||||||
table.getDocumentTree().addEntityToGraph(tableEntity);
|
table.getDocumentTree().addEntityToGraph(tableEntity);
|
||||||
|
|
||||||
return tableEntity;
|
return Optional.of(tableEntity);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1000,7 +1009,7 @@ public class EntityCreationService {
|
|||||||
Optional<TextEntity> optionalTextEntity = node.getDocumentTree().getRoot().getNode().getEntities()
|
Optional<TextEntity> optionalTextEntity = node.getDocumentTree().getRoot().getNode().getEntities()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(e -> e instanceof TextEntity && e.equals(entity) && e.type().equals(type))
|
.filter(e -> e instanceof TextEntity && e.equals(entity) && e.type().equals(type))
|
||||||
.map(e -> (TextEntity)e)
|
.map(e -> (TextEntity) e)
|
||||||
.peek(e -> e.addEngines(engines))
|
.peek(e -> e.addEngines(engines))
|
||||||
.findAny();
|
.findAny();
|
||||||
if (optionalTextEntity.isEmpty()) {
|
if (optionalTextEntity.isEmpty()) {
|
||||||
@ -1499,11 +1508,9 @@ public class EntityCreationService {
|
|||||||
|
|
||||||
private void addListenerToEntity(IEntity textEntity) {
|
private void addListenerToEntity(IEntity textEntity) {
|
||||||
|
|
||||||
if(kieSessionUpdater != null) {
|
if (kieSessionUpdater != null) {
|
||||||
textEntity.addEntityEventListener(kieSessionUpdater);
|
textEntity.addEntityEventListener(kieSessionUpdater);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1155,15 +1155,6 @@ rule "DOC.35.0: Doses (mg/kg bodyweight)"
|
|||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
rule "DOC.100.0: Create TableEntities for all Tables"
|
|
||||||
when
|
|
||||||
$table: Table()
|
|
||||||
then
|
|
||||||
TableEntity tableEntity = entityCreationService.bySemanticNode($table, "table", EntityType.ENTITY);
|
|
||||||
tableEntity.apply("DOC.100.0", "Table found.", "n-a");
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
//------------------------------------ AI rules ------------------------------------
|
//------------------------------------ AI rules ------------------------------------
|
||||||
|
|
||||||
// Rule unit: AI.4
|
// Rule unit: AI.4
|
||||||
|
|||||||
@ -460,15 +460,6 @@ rule "UsedApproach.1.0: Used approach not found and thus 'Individual'"
|
|||||||
componentCreationService.create("UsedApproach.1.0", "What_was_the_approach_used", "Individual", "'Group' when approach used is present, else 'Individual'");
|
componentCreationService.create("UsedApproach.1.0", "What_was_the_approach_used", "Individual", "'Group' when approach used is present, else 'Individual'");
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
rule "TableComponents.900.0: Create components for all table entities."
|
|
||||||
salience -900
|
|
||||||
when
|
|
||||||
$tables: List() from collect (Entity(type == "Table"))
|
|
||||||
then
|
|
||||||
componentCreationService.createComponentForTables("TableComponents.900.0", $tables);
|
|
||||||
end
|
|
||||||
|
|
||||||
rule "DefaultComponents.999.0: Create components for all unmapped entities."
|
rule "DefaultComponents.999.0: Create components for all unmapped entities."
|
||||||
salience -999
|
salience -999
|
||||||
when
|
when
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user