DM-307: documine fixes
This commit is contained in:
parent
be6fe0b0ca
commit
fc233cb56d
@ -0,0 +1,70 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.function.BiConsumer;
|
||||
import java.util.function.BinaryOperator;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.Collector;
|
||||
|
||||
import com.google.common.base.Functions;
|
||||
|
||||
public class ConsecutiveBoundaryCollector implements Collector<Boundary, List<Boundary>, List<Boundary>> {
|
||||
|
||||
@Override
|
||||
public Supplier<List<Boundary>> supplier() {
|
||||
|
||||
return LinkedList::new;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public BiConsumer<List<Boundary>, Boundary> accumulator() {
|
||||
|
||||
return (existingList, boundary) -> {
|
||||
if (existingList.isEmpty()) {
|
||||
existingList.add(boundary);
|
||||
return;
|
||||
}
|
||||
|
||||
Boundary prevBoundary = existingList.get(existingList.size() - 1);
|
||||
if (prevBoundary.end() > boundary.start()) {
|
||||
throw new IllegalArgumentException(String.format("Can't concatenate %s and %s. Boundaries must be ordered!", prevBoundary, boundary));
|
||||
}
|
||||
|
||||
if (prevBoundary.end() == boundary.start()) {
|
||||
existingList.remove(existingList.size() - 1);
|
||||
existingList.add(Boundary.merge(List.of(prevBoundary, boundary)));
|
||||
} else {
|
||||
existingList.add(boundary);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public BinaryOperator<List<Boundary>> combiner() {
|
||||
|
||||
return (list1, list2) -> {
|
||||
list1.addAll(list2);
|
||||
return list1;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Function<List<Boundary>, List<Boundary>> finisher() {
|
||||
|
||||
return Functions.identity();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Set<Characteristics> characteristics() {
|
||||
|
||||
return Set.of(Characteristics.IDENTITY_FINISH);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,5 +1,6 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.services;
|
||||
|
||||
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.anyMatch;
|
||||
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.getExpandedEndByRegex;
|
||||
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.getExpandedStartByRegex;
|
||||
import static com.iqser.red.service.redaction.v1.server.redaction.utils.SeparatorUtils.boundaryIsSurroundedBySeparators;
|
||||
@ -22,6 +23,7 @@ import org.kie.api.runtime.KieSession;
|
||||
import com.google.common.base.Functions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.ConsecutiveBoundaryCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
@ -420,6 +422,18 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public Stream<RedactionEntity> bySemanticNodeParagraphsOnlyMergeConsecutive(SemanticNode node, String type, EntityType entityType) {
|
||||
|
||||
return node.streamAllSubNodesOfType(NodeType.PARAGRAPH)
|
||||
.map(SemanticNode::getBoundary)
|
||||
.collect(new ConsecutiveBoundaryCollector())
|
||||
.stream()
|
||||
.map(boundary -> byBoundary(boundary, type, entityType, node))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
|
||||
public Optional<RedactionEntity> bySemanticNode(SemanticNode node, String type, EntityType entityType) {
|
||||
|
||||
Boundary boundary = node.getTextBlock().getBoundary();
|
||||
|
||||
@ -0,0 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class ConsecutiveBoundaryCollectorTest {
|
||||
|
||||
}
|
||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user