RED-9221 - Fix component extraction in tables where rows and columns are not 1:1 #418
@ -4,10 +4,11 @@ plugins {
|
|||||||
}
|
}
|
||||||
|
|
||||||
description = "redaction-service-api-v1"
|
description = "redaction-service-api-v1"
|
||||||
|
val persistenceServiceVersion = "2.439.0"
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation("org.springframework:spring-web:6.0.12")
|
implementation("org.springframework:spring-web:6.0.12")
|
||||||
implementation("com.iqser.red.service:persistence-service-internal-api-v1:2.411.0")
|
implementation("com.iqser.red.service:persistence-service-internal-api-v1:${persistenceServiceVersion}")
|
||||||
}
|
}
|
||||||
|
|
||||||
publishing {
|
publishing {
|
||||||
|
|||||||
@ -16,7 +16,7 @@ val layoutParserVersion = "0.131.0"
|
|||||||
val jacksonVersion = "2.15.2"
|
val jacksonVersion = "2.15.2"
|
||||||
val droolsVersion = "9.44.0.Final"
|
val droolsVersion = "9.44.0.Final"
|
||||||
val pdfBoxVersion = "3.0.0"
|
val pdfBoxVersion = "3.0.0"
|
||||||
val persistenceServiceVersion = "2.429.0"
|
val persistenceServiceVersion = "2.439.0"
|
||||||
val springBootStarterVersion = "3.1.5"
|
val springBootStarterVersion = "3.1.5"
|
||||||
val springCloudVersion = "4.0.4"
|
val springCloudVersion = "4.0.4"
|
||||||
val testContainersVersion = "1.19.7"
|
val testContainersVersion = "1.19.7"
|
||||||
|
|||||||
@ -68,7 +68,7 @@ public class Entity {
|
|||||||
Set<String> importedRedactionIntersections;
|
Set<String> importedRedactionIntersections;
|
||||||
|
|
||||||
|
|
||||||
public static Entity fromEntityLogEntry(EntityLogEntry e, Document document) {
|
public static Entity fromEntityLogEntry(EntityLogEntry e, Document document, int startOffset, int endOffset) {
|
||||||
|
|
||||||
return Entity.builder()
|
return Entity.builder()
|
||||||
.id(e.getId())
|
.id(e.getId())
|
||||||
@ -86,8 +86,8 @@ public class Entity {
|
|||||||
.containingNode(document.getDocumentTree().getEntryById(e.getContainingNodeId()).getNode())
|
.containingNode(document.getDocumentTree().getEntryById(e.getContainingNodeId()).getNode())
|
||||||
.textBefore(e.getTextBefore())
|
.textBefore(e.getTextBefore())
|
||||||
.textAfter(e.getTextAfter())
|
.textAfter(e.getTextAfter())
|
||||||
.startOffset(e.getStartOffset())
|
.startOffset(startOffset)
|
||||||
.endOffset(e.getEndOffset())
|
.endOffset(endOffset)
|
||||||
.length(Optional.ofNullable(e.getValue())
|
.length(Optional.ofNullable(e.getValue())
|
||||||
.orElse("").length())
|
.orElse("").length())
|
||||||
.imageHasTransparency(e.isImageHasTransparency())
|
.imageHasTransparency(e.isImageHasTransparency())
|
||||||
|
|||||||
@ -166,6 +166,42 @@ public class DocumentTree {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Optional<TableCell> findTableCellInTable(List<Integer> treeId, int start, int end) {
|
||||||
|
|
||||||
|
return findTableCellInTableRecursively(getEntryById(treeId).getChildren(), start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Optional<TableCell> findTableCellInTableRecursively(List<Entry> entries, int start, int end) {
|
||||||
|
|
||||||
|
int startIdx = findFirstIdxOfContainingChildBinarySearch(entries, start);
|
||||||
|
if (startIdx < 0) {
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
Entry entry = entries.get(startIdx);
|
||||||
|
|
||||||
|
if (entry.getNode().getTextRange().contains(end) && entry.getNode() instanceof TableCell tableCell) {
|
||||||
|
if (!entry.getNode().isLeaf()) {
|
||||||
|
Optional<TableCell> foundInChildren = findTableCellInTableRecursively(entry.getChildren(), start, end);
|
||||||
|
if (foundInChildren.isPresent()) {
|
||||||
|
return foundInChildren;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Optional.of(tableCell);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!entry.getNode().isLeaf()) {
|
||||||
|
Optional<TableCell> foundInChildren = findTableCellInTableRecursively(entry.getChildren(), start, end);
|
||||||
|
if (foundInChildren.isPresent()) {
|
||||||
|
return foundInChildren;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private int findFirstIdxOfContainingChildBinarySearch(List<Entry> childNodes, int start) {
|
private int findFirstIdxOfContainingChildBinarySearch(List<Entry> childNodes, int start) {
|
||||||
|
|
||||||
int low = 0;
|
int low = 0;
|
||||||
|
|||||||
@ -12,6 +12,7 @@ import java.util.stream.Collectors;
|
|||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.DuplicatedTextRange;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogChanges;
|
||||||
@ -286,6 +287,10 @@ public class EntityLogCreatorService {
|
|||||||
.textBefore(entity.getTextBefore())
|
.textBefore(entity.getTextBefore())
|
||||||
.startOffset(entity.getTextRange().start())
|
.startOffset(entity.getTextRange().start())
|
||||||
.endOffset(entity.getTextRange().end())
|
.endOffset(entity.getTextRange().end())
|
||||||
|
.duplicatedTextRanges(entity.getDuplicateTextRanges()
|
||||||
|
.stream()
|
||||||
|
.map(textRange -> DuplicatedTextRange.builder().start(textRange.start()).end(textRange.end()).build())
|
||||||
|
.toList())
|
||||||
.dossierDictionaryEntry(entity.isDossierDictionaryEntry())
|
.dossierDictionaryEntry(entity.isDossierDictionaryEntry())
|
||||||
.engines(getEngines(entity.getEngines(), entity.getManualOverwrite()))
|
.engines(getEngines(entity.getEngines(), entity.getManualOverwrite()))
|
||||||
//imported is no longer used, frontend should check engines
|
//imported is no longer used, frontend should check engines
|
||||||
|
|||||||
@ -31,7 +31,7 @@ public class ComponentLogCreatorService {
|
|||||||
});
|
});
|
||||||
List<ComponentLogEntry> componentLogComponents = map.entrySet()
|
List<ComponentLogEntry> componentLogComponents = map.entrySet()
|
||||||
.stream()
|
.stream()
|
||||||
.map(entry -> new ComponentLogEntry(entry.getKey(), entry.getValue()))
|
.map(entry -> new ComponentLogEntry(entry.getKey(), entry.getValue(), false))
|
||||||
.toList();
|
.toList();
|
||||||
return new ComponentLog(analysisNumber, componentRulesVersion, componentLogComponents);
|
return new ComponentLog(analysisNumber, componentRulesVersion, componentLogComponents);
|
||||||
}
|
}
|
||||||
@ -41,7 +41,6 @@ public class ComponentLogCreatorService {
|
|||||||
|
|
||||||
return ComponentLogEntryValue.builder()
|
return ComponentLogEntryValue.builder()
|
||||||
.value(component.getValue())
|
.value(component.getValue())
|
||||||
.originalValue(component.getValue())
|
|
||||||
.componentRuleId(component.getMatchedRule().toString())
|
.componentRuleId(component.getMatchedRule().toString())
|
||||||
.valueDescription(component.getValueDescription())
|
.valueDescription(component.getValueDescription())
|
||||||
.componentLogEntityReferences(toComponentEntityReferences(component.getReferences()
|
.componentLogEntityReferences(toComponentEntityReferences(component.getReferences()
|
||||||
|
|||||||
@ -1,5 +1,11 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.service.document;
|
package com.iqser.red.service.redaction.v1.server.service.document;
|
||||||
|
|
||||||
|
import static com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils.findEntitiesFromFirstSection;
|
||||||
|
import static com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils.findEntitiesFromLongestSection;
|
||||||
|
import static com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils.getFirstTableCell;
|
||||||
|
import static com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils.joinEntitiesOnSameRow;
|
||||||
|
import static com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils.joinTypes;
|
||||||
|
|
||||||
import java.text.BreakIterator;
|
import java.text.BreakIterator;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
@ -9,7 +15,6 @@ import java.util.HashSet;
|
|||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
@ -19,10 +24,9 @@ import org.kie.api.runtime.KieSession;
|
|||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.model.component.Component;
|
import com.iqser.red.service.redaction.v1.server.model.component.Component;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.component.Entity;
|
import com.iqser.red.service.redaction.v1.server.model.component.Entity;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.drools.RuleIdentifier;
|
import com.iqser.red.service.redaction.v1.server.model.drools.RuleIdentifier;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.utils.ComponentCreationUtils;
|
||||||
import com.iqser.red.service.redaction.v1.server.utils.DateConverter;
|
import com.iqser.red.service.redaction.v1.server.utils.DateConverter;
|
||||||
|
|
||||||
import lombok.AccessLevel;
|
import lombok.AccessLevel;
|
||||||
@ -37,24 +41,6 @@ public class ComponentCreationService {
|
|||||||
Set<Entity> referencedEntities = new HashSet<>();
|
Set<Entity> referencedEntities = new HashSet<>();
|
||||||
|
|
||||||
|
|
||||||
private static List<Entity> findEntitiesFromLongestSection(Collection<Entity> entities) {
|
|
||||||
|
|
||||||
var entitiesBySection = entities.stream()
|
|
||||||
.collect(Collectors.groupingBy(entity -> entity.getContainingNode().getHighestParent()));
|
|
||||||
Optional<SemanticNode> longestSection = entitiesBySection.entrySet()
|
|
||||||
.stream()
|
|
||||||
.sorted(Comparator.comparingInt(ComponentCreationService::getTotalLengthOfEntities).reversed())
|
|
||||||
.map(Map.Entry::getKey)
|
|
||||||
.findFirst();
|
|
||||||
|
|
||||||
if (longestSection.isEmpty()) {
|
|
||||||
return Collections.emptyList();
|
|
||||||
}
|
|
||||||
|
|
||||||
return entitiesBySection.get(longestSection.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Joins entity values, and creates a component from the result.
|
* Joins entity values, and creates a component from the result.
|
||||||
*
|
*
|
||||||
@ -87,15 +73,6 @@ public class ComponentCreationService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static String joinTypes(Collection<Entity> entities) {
|
|
||||||
|
|
||||||
return entities.stream()
|
|
||||||
.map(Entity::getType)
|
|
||||||
.distinct()
|
|
||||||
.collect(Collectors.joining(", "));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new component with the given parameters and inserts it into the kieSession.
|
* Creates a new component with the given parameters and inserts it into the kieSession.
|
||||||
*
|
*
|
||||||
@ -146,20 +123,6 @@ public class ComponentCreationService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static List<Entity> findEntitiesFromFirstSection(Collection<Entity> entities) {
|
|
||||||
|
|
||||||
var entitiesBySection = entities.stream()
|
|
||||||
.collect(Collectors.groupingBy(entity -> entity.getContainingNode().getHighestParent()));
|
|
||||||
Optional<SemanticNode> firstSection = entitiesBySection.keySet()
|
|
||||||
.stream()
|
|
||||||
.min(SemanticNodeComparators.first());
|
|
||||||
if (firstSection.isEmpty()) {
|
|
||||||
return Collections.emptyList();
|
|
||||||
}
|
|
||||||
return entitiesBySection.get(firstSection.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Joins unique entity values from the first section entities appear in, and creates a component from the result.
|
* Joins unique entity values from the first section entities appear in, and creates a component from the result.
|
||||||
*
|
*
|
||||||
@ -252,14 +215,6 @@ public class ComponentCreationService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static int getTotalLengthOfEntities(Map.Entry<SemanticNode, List<Entity>> entry) {
|
|
||||||
|
|
||||||
return entry.getValue()
|
|
||||||
.stream()
|
|
||||||
.mapToInt(Entity::getLength).sum();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Joins unique entity values with delimiter ', ' from the section with the longest combined entity values only, and creates a component from the result.
|
* Joins unique entity values with delimiter ', ' from the section with the longest combined entity values only, and creates a component from the result.
|
||||||
*
|
*
|
||||||
@ -329,7 +284,7 @@ public class ComponentCreationService {
|
|||||||
public void rowValueCount(String ruleIdentifier, String name, Collection<Entity> entities) {
|
public void rowValueCount(String ruleIdentifier, String name, Collection<Entity> entities) {
|
||||||
|
|
||||||
entities.stream()
|
entities.stream()
|
||||||
.collect(Collectors.groupingBy(this::getFirstTable))
|
.collect(Collectors.groupingBy(ComponentCreationUtils::getFirstTable))
|
||||||
.forEach((optionalTable, groupedEntities) -> {
|
.forEach((optionalTable, groupedEntities) -> {
|
||||||
|
|
||||||
if (optionalTable.isEmpty()) {
|
if (optionalTable.isEmpty()) {
|
||||||
@ -491,60 +446,21 @@ public class ComponentCreationService {
|
|||||||
.sorted(Comparator.reverseOrder())
|
.sorted(Comparator.reverseOrder())
|
||||||
.distinct()
|
.distinct()
|
||||||
.collect(Collectors.joining(", "));
|
.collect(Collectors.joining(", "));
|
||||||
|
|
||||||
String valueDescription = String.format("Combine values of %s that are in same table row", types);
|
String valueDescription = String.format("Combine values of %s that are in same table row", types);
|
||||||
|
|
||||||
entities.stream()
|
entities.stream()
|
||||||
.collect(Collectors.groupingBy(this::getFirstTable))
|
.collect(Collectors.groupingBy(ComponentCreationUtils::getFirstTable))
|
||||||
.forEach((optionalTable, groupedEntities) -> {
|
.forEach((optionalTable, groupedEntities) -> {
|
||||||
if (optionalTable.isEmpty()) {
|
if (optionalTable.isEmpty()) {
|
||||||
groupedEntities.forEach(entity -> create(ruleIdentifier, name, entity.getValue(), valueDescription, entity));
|
groupedEntities.forEach(entity -> create(ruleIdentifier, name, entity.getValue(), valueDescription, entity));
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
joinEntitiesOnSameRow(ruleIdentifier, name, groupedEntities, valueDescription, delimiter, this::create);
|
||||||
groupedEntities.stream()
|
|
||||||
.filter(entity -> entity.getContainingNode() instanceof TableCell)
|
|
||||||
.collect(Collectors.groupingBy(entity -> ((TableCell) entity.getContainingNode()).getRow())).entrySet()
|
|
||||||
.stream()
|
|
||||||
.sorted(Comparator.comparingInt(Map.Entry::getKey))
|
|
||||||
.map(Map.Entry::getValue)
|
|
||||||
.forEach(entitiesInSameRow -> create(ruleIdentifier,
|
|
||||||
name,
|
|
||||||
entitiesInSameRow.stream()
|
|
||||||
.sorted(EntityComparators.first())
|
|
||||||
.map(Entity::getValue)
|
|
||||||
.collect(Collectors.joining(delimiter)),
|
|
||||||
valueDescription,
|
|
||||||
entitiesInSameRow));
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Optional<Table> getFirstTable(Entity entity) {
|
|
||||||
|
|
||||||
SemanticNode node = entity.getContainingNode();
|
|
||||||
while (!(node instanceof Table)) {
|
|
||||||
if (!node.hasParent()) {
|
|
||||||
return Optional.empty();
|
|
||||||
}
|
|
||||||
node = node.getParent();
|
|
||||||
}
|
|
||||||
|
|
||||||
return Optional.of((Table) node);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private Optional<TableCell> getFirstTableCell(Entity entity) {
|
|
||||||
|
|
||||||
SemanticNode node = entity.getContainingNode();
|
|
||||||
while (!(node instanceof TableCell)) {
|
|
||||||
if (!node.hasParent()) {
|
|
||||||
return Optional.empty();
|
|
||||||
}
|
|
||||||
node = node.getParent();
|
|
||||||
}
|
|
||||||
|
|
||||||
return Optional.of((TableCell) node);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new component with the given rule identifier, name, value, and value description.
|
* Creates a new component with the given rule identifier, name, value, and value description.
|
||||||
* If the component is part of a table, it also takes a list of entities that belong to the same table row.
|
* If the component is part of a table, it also takes a list of entities that belong to the same table row.
|
||||||
|
|||||||
@ -979,8 +979,8 @@ public class EntityCreationService {
|
|||||||
return Optional.empty();
|
return Optional.empty();
|
||||||
}
|
}
|
||||||
TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType, node);
|
TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType, node);
|
||||||
if (node.getEntities().contains(entity)) {
|
if (node.getDocumentTree().getRoot().getNode().getEntities().contains(entity)) {
|
||||||
Optional<TextEntity> optionalTextEntity = node.getEntities()
|
Optional<TextEntity> optionalTextEntity = node.getDocumentTree().getRoot().getNode().getEntities()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(e -> e.equals(entity) && e.type().equals(type))
|
.filter(e -> e.equals(entity) && e.type().equals(type))
|
||||||
.peek(e -> e.addEngines(engines))
|
.peek(e -> e.addEngines(engines))
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
package com.iqser.red.service.redaction.v1.server.service.drools;
|
package com.iqser.red.service.redaction.v1.server.service.drools;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
@ -67,8 +68,19 @@ public class ComponentDroolsExecutionService {
|
|||||||
entityLog.getEntityLogEntry()
|
entityLog.getEntityLogEntry()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(this::isApplied)
|
.filter(this::isApplied)
|
||||||
.map(entry -> Entity.fromEntityLogEntry(entry, document))
|
.filter(entityLogEntry -> entityLogEntry.getState().equals(EntryState.APPLIED))
|
||||||
|
.flatMap(entry -> {
|
||||||
|
List<Entity> entities = new ArrayList<>();
|
||||||
|
entities.add(Entity.fromEntityLogEntry(entry, document, entry.getStartOffset(), entry.getEndOffset()));
|
||||||
|
if (entry.getDuplicatedTextRanges() != null && !entry.getDuplicatedTextRanges().isEmpty()) {
|
||||||
|
entry.getDuplicatedTextRanges().forEach(duplicatedTextRange -> {
|
||||||
|
entities.add(Entity.fromEntityLogEntry(entry, document, duplicatedTextRange.getStart(), duplicatedTextRange.getEnd()));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return entities.stream();
|
||||||
|
})
|
||||||
.forEach(kieSession::insert);
|
.forEach(kieSession::insert);
|
||||||
|
|
||||||
fileAttributes.stream()
|
fileAttributes.stream()
|
||||||
.filter(f -> f.getValue() != null)
|
.filter(f -> f.getValue() != null)
|
||||||
.forEach(kieSession::insert);
|
.forEach(kieSession::insert);
|
||||||
|
|||||||
@ -0,0 +1,136 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.utils;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.component.Entity;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.service.document.EntityComparators;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.service.document.SemanticNodeComparators;
|
||||||
|
|
||||||
|
public class ComponentCreationUtils {
|
||||||
|
|
||||||
|
public static TableCell getFirstTableCell(Collection<Entity> uniqueEntities) {
|
||||||
|
|
||||||
|
return (TableCell) uniqueEntities.stream()
|
||||||
|
.findFirst()
|
||||||
|
.orElseThrow(() -> new IllegalArgumentException("No entities found")).getContainingNode();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static String joinTypes(Collection<Entity> entities) {
|
||||||
|
|
||||||
|
return entities.stream()
|
||||||
|
.map(Entity::getType)
|
||||||
|
.distinct()
|
||||||
|
.collect(Collectors.joining(", "));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static List<Entity> findEntitiesFromLongestSection(Collection<Entity> entities) {
|
||||||
|
|
||||||
|
var entitiesBySection = entities.stream()
|
||||||
|
.collect(Collectors.groupingBy(entity -> entity.getContainingNode().getHighestParent()));
|
||||||
|
Optional<SemanticNode> longestSection = entitiesBySection.entrySet()
|
||||||
|
.stream()
|
||||||
|
.sorted(Comparator.comparingInt(ComponentCreationUtils::getTotalLengthOfEntities).reversed())
|
||||||
|
.map(Map.Entry::getKey)
|
||||||
|
.findFirst();
|
||||||
|
|
||||||
|
if (longestSection.isEmpty()) {
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
|
||||||
|
return entitiesBySection.get(longestSection.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static List<Entity> findEntitiesFromFirstSection(Collection<Entity> entities) {
|
||||||
|
|
||||||
|
var entitiesBySection = entities.stream()
|
||||||
|
.collect(Collectors.groupingBy(entity -> entity.getContainingNode().getHighestParent()));
|
||||||
|
Optional<SemanticNode> firstSection = entitiesBySection.keySet()
|
||||||
|
.stream()
|
||||||
|
.min(SemanticNodeComparators.first());
|
||||||
|
if (firstSection.isEmpty()) {
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
return entitiesBySection.get(firstSection.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static Optional<TableCell> getFirstTableCell(Entity entity) {
|
||||||
|
|
||||||
|
SemanticNode node = entity.getContainingNode();
|
||||||
|
while (!(node instanceof TableCell)) {
|
||||||
|
if (!node.hasParent()) {
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
node = node.getParent();
|
||||||
|
}
|
||||||
|
|
||||||
|
return Optional.of((TableCell) node);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static Optional<Table> getFirstTable(Entity entity) {
|
||||||
|
|
||||||
|
SemanticNode node = entity.getContainingNode();
|
||||||
|
while (!(node instanceof Table)) {
|
||||||
|
if (!node.hasParent()) {
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
node = node.getParent();
|
||||||
|
}
|
||||||
|
|
||||||
|
return Optional.of((Table) node);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static void joinEntitiesOnSameRow(String ruleIdentifier, String name, List<Entity> groupedEntities, String valueDescription, String delimiter, QuintConsumer<String, String, String, String, Collection<Entity>> create) {
|
||||||
|
|
||||||
|
groupedEntities.stream()
|
||||||
|
.filter(entity -> entity.getContainingNode() instanceof TableCell || entity.getContainingNode() instanceof Table)
|
||||||
|
.collect(Collectors.groupingBy(entity -> {
|
||||||
|
if (entity.getContainingNode() instanceof TableCell) {
|
||||||
|
return ((TableCell) entity.getContainingNode()).getRow();
|
||||||
|
} else {
|
||||||
|
DocumentTree documentTree = entity.getContainingNode().getDocumentTree();
|
||||||
|
Optional<TableCell> tableCell = documentTree.findTableCellInTable(entity.getContainingNode().getTreeId(),
|
||||||
|
entity.getStartOffset(),
|
||||||
|
entity.getEndOffset());
|
||||||
|
return tableCell.map(TableCell::getRow)
|
||||||
|
.orElse(0);
|
||||||
|
|
||||||
|
}
|
||||||
|
})).entrySet()
|
||||||
|
.stream()
|
||||||
|
.sorted(Comparator.comparingInt(Map.Entry::getKey))
|
||||||
|
.map(Map.Entry::getValue)
|
||||||
|
.forEach(entitiesInSameRow -> create.accept(ruleIdentifier,
|
||||||
|
name,
|
||||||
|
entitiesInSameRow.stream()
|
||||||
|
.sorted(EntityComparators.first())
|
||||||
|
.map(Entity::getValue)
|
||||||
|
.collect(Collectors.joining(delimiter)),
|
||||||
|
valueDescription,
|
||||||
|
entitiesInSameRow));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static int getTotalLengthOfEntities(Map.Entry<SemanticNode, List<Entity>> entry) {
|
||||||
|
|
||||||
|
return entry.getValue()
|
||||||
|
.stream()
|
||||||
|
.mapToInt(Entity::getLength).sum();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -0,0 +1,10 @@
|
|||||||
|
package com.iqser.red.service.redaction.v1.server.utils;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
|
@FunctionalInterface
|
||||||
|
public interface QuintConsumer<T, U, V, W, X extends Collection<?>> {
|
||||||
|
|
||||||
|
void accept(T t, U u, V v, W w, X x);
|
||||||
|
|
||||||
|
}
|
||||||
@ -11,6 +11,7 @@ import java.time.OffsetDateTime;
|
|||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Disabled;
|
import org.junit.jupiter.api.Disabled;
|
||||||
@ -32,6 +33,7 @@ import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
|||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.componentlog.ComponentLogEntryValue;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
|
||||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
|
||||||
@ -286,4 +288,25 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
|
|||||||
.get(0).getValue());
|
.get(0).getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDoseMortalityExtraction() {
|
||||||
|
|
||||||
|
AnalyzeRequest request = uploadFileToStorage("files/syngenta/CustomerFiles/Documine/Flora/VV-547525_Toxicidade_Oral_Aguda.pdf");
|
||||||
|
|
||||||
|
System.out.println("Start Full integration test");
|
||||||
|
analyzeDocumentStructure(LayoutParsingType.DOCUMINE, request);
|
||||||
|
System.out.println("Finished structure analysis");
|
||||||
|
analyzeService.analyze(request);
|
||||||
|
System.out.println("Finished analysis");
|
||||||
|
|
||||||
|
var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||||
|
var doseMortality = componentLog.getComponentLogEntries().stream().filter(componentLogEntry -> componentLogEntry.getName().equals("Dose_Mortality")).findFirst().get();
|
||||||
|
|
||||||
|
assertEquals(doseMortality.getComponentValues().size(), 5);
|
||||||
|
|
||||||
|
Pattern pattern = Pattern.compile("^5000, [SD]$");
|
||||||
|
boolean allMatch = doseMortality.getComponentValues().stream().map(ComponentLogEntryValue::getValue).allMatch(pattern.asPredicate());
|
||||||
|
assertTrue(allMatch);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
Subproject commit 21fefb64bf27ca2b3329a6c69d90a27450b17930
|
Subproject commit 5705cc0782605fdca5dfff134b436f7143c9e421
|
||||||
Loading…
x
Reference in New Issue
Block a user