diff --git a/vidocp/table_parsing.py b/vidocp/table_parsing.py index 5b811b8..c23aa5b 100644 --- a/vidocp/table_parsing.py +++ b/vidocp/table_parsing.py @@ -57,7 +57,7 @@ def parse_table(image: np.array): rects = stats[:, :-1][2:] # FIXME: For some reason some isolated rects remain. - rects = remove_isolated(rects) + rects = remove_isolated(rects, input_sorted=True) return rects diff --git a/vidocp/utils/post_processing.py b/vidocp/utils/post_processing.py index c05ab03..13f3149 100644 --- a/vidocp/utils/post_processing.py +++ b/vidocp/utils/post_processing.py @@ -1,6 +1,6 @@ from collections import namedtuple from functools import partial -from itertools import starmap +from itertools import starmap, compress def remove_overlapping(rectangles): @@ -29,24 +29,73 @@ def remove_included(rectangles): return rectangles +def adjacent1d(n, m, tolerance=1): + return abs(n - m) <= tolerance + + +def adjacent(a, b): + """Two rects (v1, v2), (w1, w2) are adjacent if either of: + - the x components of v2 and w1 match and the y components of w1 or w2 are in the range of the y components of v1 and v2 + - the x components of v1 and w2 match and the y components of w1 or w2 are in the range of the y components of v1 and v2 + - the y components of v2 and w1 match and the x components of w1 or w2 are in the range of the x components of v1 and v2 + - the y components of v1 and w2 match and the x components of w1 or w2 are in the range of the x components of v1 and v2 + """ + + def adjacent2d(g, h, i, j, k, l): + return adjacent1d(g, h) and any(k <= p <= l for p in [i, j]) + + if any(x is None for x in (a, b)): + return False + + v1 = a.xmin, a.ymin + v2 = a.xmax, a.ymax + + w1 = b.xmin, b.ymin + w2 = b.xmax, b.ymax + + return any( + ( + adjacent2d(v2[0], w1[0], w1[1], w2[1], v1[1], v2[1]), + adjacent2d(v1[0], w2[0], w1[1], w2[1], v1[1], v2[1]), + adjacent2d(v2[1], w1[1], w1[0], w2[0], v1[0], v2[0]), + adjacent2d(v1[1], w2[1], w1[0], w2[0], v1[0], v2[0]), + ) + ) + + # FIXME: For some reason some isolated rects remain. -def remove_isolated(rectangles): - def are_neighbours(a, b): - - def adjacent(n, m): - return abs(n - m) <= 1 - - return any(starmap(adjacent, [(b.xmin, a.xmax), (b.ymin, a.ymax), (b.xmax, a.xmin), (b.ymax, a.ymin)])) - +def __remove_isolated_unsorted(rectangles): def is_connected(rect, rectangles): - return any(are_neighbours(r2, rect) for r2 in rectangles if not rect == r2) + return any(adjacent(r2, rect) for r2 in rectangles if not rect == r2) rectangles = list(map(xywh_to_vec_rect, rectangles)) rectangles = filter(partial(is_connected, rectangles=rectangles), rectangles) rectangles = map(vec_rect_to_xywh, rectangles) + return rectangles +def __remove_isolated_sorted(rectangles): + def is_connected(left, center, right): + # if center == Rectangle(xmin=337, ymin=154, xmax=512, ymax=187) or center == Rectangle(xmin=719, ymin=188, xmax=781, ymax=251): + return any(starmap(adjacent, [(left, center), (center, right)])) + + rectangles = list(map(xywh_to_vec_rect, rectangles)) + + lefts = [None, *rectangles[:-1]] + rights = [*rectangles[1:], None] + + mask = starmap(is_connected, zip(lefts, rectangles, rights)) + rectangles = compress(rectangles, mask) + rectangles = map(vec_rect_to_xywh, rectangles) + + return rectangles + + +def remove_isolated(rectangles, input_sorted=False): + return (__remove_isolated_sorted if input_sorted else __remove_isolated_unsorted)(rectangles) + + Rectangle = namedtuple("Rectangle", "xmin ymin xmax ymax")