diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java index e6c28550..c19fd30d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/DocumentTree.java @@ -288,8 +288,8 @@ public class DocumentTree { if (treeId.isEmpty()) { return root; } - Entry entry = root.children.get(treeId.get(0)); - for (int id : treeId.subList(1, treeId.size())) { + Entry entry = root; + for (int id : treeId) { entry = entry.children.get(id); } return entry; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java index 51bdf71d..fdc28329 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java @@ -614,21 +614,21 @@ public class EntityCreationService { /** * Looks across the remaining table row to the right of the provided TableCell if any line intersects the y coordinates of the found text. * - * @param TextRanges a list of textRanges + * @param textRanges a list of textRanges * @param tableCell the table cell * @param type the type * @param entityType the entity type * @param tableNode the table node * @return a stream of RedactionEntities */ - private Stream lineAfterBoundariesAcrossColumns(List TextRanges, TableCell tableCell, String type, EntityType entityType, Table tableNode) { + private Stream lineAfterBoundariesAcrossColumns(List textRanges, TableCell tableCell, String type, EntityType entityType, Table tableNode) { - return TextRanges.stream() + return textRanges.stream() .map(boundary -> RectangleTransformations.rectangle2DBBox(tableCell.getTextBlock().getPositions(boundary))) .map(bBox -> Pair.of(bBox.getMaxY(), bBox.getMinY())) .map(maxMinPair -> tableNode.streamRow(tableCell.getRow()) .filter(nextTableCell -> nextTableCell.getCol() > tableCell.getCol()) - .map(nextTableCell -> RedactionSearchUtility.findTextRangesOfAllLinesInYRange(maxMinPair.getLeft(), maxMinPair.getRight(), nextTableCell.getTextBlock())) + .map(nextTableCell -> RedactionSearchUtility.findTextRangesOfAllLinesWithCloseYCoordinates(maxMinPair.getLeft(), maxMinPair.getRight(), nextTableCell.getTextBlock())) .map(b -> b.trim(tableNode.getTextBlock())) .filter(boundary -> isValidEntityTextRange(tableNode.getTextBlock(), boundary)) .map(boundary -> byTextRange(boundary, type, entityType, tableNode)) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RedactionSearchUtility.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RedactionSearchUtility.java index 094e7e42..c3a5323e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RedactionSearchUtility.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RedactionSearchUtility.java @@ -3,8 +3,10 @@ package com.iqser.red.service.redaction.v1.server.utils; import static java.lang.String.format; import java.awt.geom.Rectangle2D; +import java.awt.geom.RectangularShape; import java.util.LinkedList; import java.util.List; +import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.IntStream; @@ -154,14 +156,8 @@ public class RedactionSearchUtility { */ public static TextRange findTextRangesOfAllLinesInYRange(double maxY, double minY, TextBlock textBlock) { - List lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed() - .map(textBlock::getLineTextRange) - .filter(lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary)) - .toList(); - if (lineBoundaries.isEmpty()) { - return new TextRange(textBlock.getTextRange().start(), textBlock.getTextRange().start()); - } - return TextRange.merge(lineBoundaries); + Predicate isWithinYRange = lineBoundary -> isWithinYRange(maxY, minY, textBlock, lineBoundary); + return filterLineBoundaries(textBlock, isWithinYRange); } @@ -172,6 +168,49 @@ public class RedactionSearchUtility { } + /** + * Identifies all lines within a text block that have roughly the same vertical coordinates. + * + * @param maxY The maximum Y-coordinate of the vertical range. + * @param minY The minimum Y-coordinate of the vertical range. + * @param textBlock The text block containing the lines to be checked. + * @return A {@link TextRange} encompassing all lines within the specified Y-coordinate range. + */ + public static TextRange findTextRangesOfAllLinesWithCloseYCoordinates(Double maxY, Double minY, TextBlock textBlock) { + + double averageLineHeight = IntStream.range(0, textBlock.numberOfLines()).boxed() + .map(textBlock::getLineTextRange) + .flatMap((TextRange stringTextRange) -> textBlock.getPositions(stringTextRange) + .stream()) + .map(RectangularShape::getHeight) + .mapToDouble(Double::doubleValue).average() + .orElse(0); + Predicate hasCloseYRange = lineBoundary -> areYCoordinatesClose(maxY, minY, textBlock, lineBoundary, averageLineHeight); + + return filterLineBoundaries(textBlock, hasCloseYRange); + } + + + private static boolean areYCoordinatesClose(Double maxY, Double minY, TextBlock textBlock, TextRange lineTextRange, double averageLineHeight) { + + Rectangle2D lineBBox = RectangleTransformations.rectangle2DBBox(textBlock.getPositions(lineTextRange)); + return Math.abs(lineBBox.getMinY() - minY) <= averageLineHeight && Math.abs(maxY - lineBBox.getMaxY()) <= averageLineHeight; + } + + + private static TextRange filterLineBoundaries(TextBlock textBlock, Predicate textRangePredicate) { + + List lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed() + .map(textBlock::getLineTextRange) + .filter(textRangePredicate) + .toList(); + if (lineBoundaries.isEmpty()) { + return new TextRange(textBlock.getTextRange().start(), textBlock.getTextRange().start()); + } + return TextRange.merge(lineBoundaries); + } + + /** * Finds TextRanges matching a regex pattern within a TextBlock. *