DM-307: documine fixes

This commit is contained in:
Kilian Schüttler 2023-07-14 14:52:09 +02:00
parent be6fe0b0ca
commit fc233cb56d
4 changed files with 91 additions and 0 deletions

View File

@ -0,0 +1,70 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collector;
import com.google.common.base.Functions;
public class ConsecutiveBoundaryCollector implements Collector<Boundary, List<Boundary>, List<Boundary>> {
@Override
public Supplier<List<Boundary>> supplier() {
return LinkedList::new;
}
@Override
public BiConsumer<List<Boundary>, Boundary> accumulator() {
return (existingList, boundary) -> {
if (existingList.isEmpty()) {
existingList.add(boundary);
return;
}
Boundary prevBoundary = existingList.get(existingList.size() - 1);
if (prevBoundary.end() > boundary.start()) {
throw new IllegalArgumentException(String.format("Can't concatenate %s and %s. Boundaries must be ordered!", prevBoundary, boundary));
}
if (prevBoundary.end() == boundary.start()) {
existingList.remove(existingList.size() - 1);
existingList.add(Boundary.merge(List.of(prevBoundary, boundary)));
} else {
existingList.add(boundary);
}
};
}
@Override
public BinaryOperator<List<Boundary>> combiner() {
return (list1, list2) -> {
list1.addAll(list2);
return list1;
};
}
@Override
public Function<List<Boundary>, List<Boundary>> finisher() {
return Functions.identity();
}
@Override
public Set<Characteristics> characteristics() {
return Set.of(Characteristics.IDENTITY_FINISH);
}
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.services;
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.anyMatch;
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.getExpandedEndByRegex;
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.getExpandedStartByRegex;
import static com.iqser.red.service.redaction.v1.server.redaction.utils.SeparatorUtils.boundaryIsSurroundedBySeparators;
@ -22,6 +23,7 @@ import org.kie.api.runtime.KieSession;
import com.google.common.base.Functions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.ConsecutiveBoundaryCollector;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
@ -420,6 +422,18 @@ public class EntityCreationService {
}
public Stream<RedactionEntity> bySemanticNodeParagraphsOnlyMergeConsecutive(SemanticNode node, String type, EntityType entityType) {
return node.streamAllSubNodesOfType(NodeType.PARAGRAPH)
.map(SemanticNode::getBoundary)
.collect(new ConsecutiveBoundaryCollector())
.stream()
.map(boundary -> byBoundary(boundary, type, entityType, node))
.filter(Optional::isPresent)
.map(Optional::get);
}
public Optional<RedactionEntity> bySemanticNode(SemanticNode node, String type, EntityType entityType) {
Boundary boundary = node.getTextBlock().getBoundary();

View File

@ -0,0 +1,7 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph;
import static org.junit.jupiter.api.Assertions.*;
class ConsecutiveBoundaryCollectorTest {
}