diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/ConsecutiveBoundaryCollector.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/ConsecutiveBoundaryCollector.java new file mode 100644 index 00000000..3a35bbc7 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/ConsecutiveBoundaryCollector.java @@ -0,0 +1,70 @@ +package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph; + +import java.util.LinkedList; +import java.util.List; +import java.util.Set; +import java.util.function.BiConsumer; +import java.util.function.BinaryOperator; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collector; + +import com.google.common.base.Functions; + +public class ConsecutiveBoundaryCollector implements Collector, List> { + + @Override + public Supplier> supplier() { + + return LinkedList::new; + } + + + @Override + public BiConsumer, Boundary> accumulator() { + + return (existingList, boundary) -> { + if (existingList.isEmpty()) { + existingList.add(boundary); + return; + } + + Boundary prevBoundary = existingList.get(existingList.size() - 1); + if (prevBoundary.end() > boundary.start()) { + throw new IllegalArgumentException(String.format("Can't concatenate %s and %s. Boundaries must be ordered!", prevBoundary, boundary)); + } + + if (prevBoundary.end() == boundary.start()) { + existingList.remove(existingList.size() - 1); + existingList.add(Boundary.merge(List.of(prevBoundary, boundary))); + } else { + existingList.add(boundary); + } + }; + } + + + @Override + public BinaryOperator> combiner() { + + return (list1, list2) -> { + list1.addAll(list2); + return list1; + }; + } + + + @Override + public Function, List> finisher() { + + return Functions.identity(); + } + + + @Override + public Set characteristics() { + + return Set.of(Characteristics.IDENTITY_FINISH); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/services/EntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/services/EntityCreationService.java index 34e84e02..a2313503 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/services/EntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/services/EntityCreationService.java @@ -1,5 +1,6 @@ package com.iqser.red.service.redaction.v1.server.layoutparsing.document.services; +import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.anyMatch; import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.getExpandedEndByRegex; import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.getExpandedStartByRegex; import static com.iqser.red.service.redaction.v1.server.redaction.utils.SeparatorUtils.boundaryIsSurroundedBySeparators; @@ -22,6 +23,7 @@ import org.kie.api.runtime.KieSession; import com.google.common.base.Functions; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.ConsecutiveBoundaryCollector; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType; import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity; @@ -420,6 +422,18 @@ public class EntityCreationService { } + public Stream bySemanticNodeParagraphsOnlyMergeConsecutive(SemanticNode node, String type, EntityType entityType) { + + return node.streamAllSubNodesOfType(NodeType.PARAGRAPH) + .map(SemanticNode::getBoundary) + .collect(new ConsecutiveBoundaryCollector()) + .stream() + .map(boundary -> byBoundary(boundary, type, entityType, node)) + .filter(Optional::isPresent) + .map(Optional::get); + } + + public Optional bySemanticNode(SemanticNode node, String type, EntityType entityType) { Boundary boundary = node.getTextBlock().getBoundary(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/ConsecutiveBoundaryCollectorTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/ConsecutiveBoundaryCollectorTest.java new file mode 100644 index 00000000..222c6648 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/layoutparsing/document/graph/ConsecutiveBoundaryCollectorTest.java @@ -0,0 +1,7 @@ +package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph; + +import static org.junit.jupiter.api.Assertions.*; + +class ConsecutiveBoundaryCollectorTest { + +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/BDR/Plenarprotokoll 1 (keine Druchsache!) (1).pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/BDR/Plenarprotokoll 1 (keine Druchsache!) (1).pdf new file mode 100644 index 00000000..564320d7 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/BDR/Plenarprotokoll 1 (keine Druchsache!) (1).pdf differ