from collections import namedtuple from functools import partial from itertools import starmap, compress def remove_overlapping(rectangles): def overlap(a, b): return compute_intersection(a, b) > 0 def does_not_overlap(rect, rectangles): return not any(overlap(rect, r2) for r2 in rectangles if not rect == r2) rectangles = list(map(xywh_to_vec_rect, rectangles)) rectangles = filter(partial(does_not_overlap, rectangles=rectangles), rectangles) rectangles = map(vec_rect_to_xywh, rectangles) return rectangles def remove_included(rectangles): def included(a, b): return b.xmin >= a.xmin and b.ymin >= a.ymin and b.xmax <= a.xmax and b.ymax <= a.ymax def includes(a, b, tol=3): """does a include b?""" return b.xmin + tol >= a.xmin and b.ymin + tol >= a.ymin and b.xmax - tol <= a.xmax and b.ymax - tol <= a.ymax def is_not_included(rect, rectangles): return not any(included(r2, rect) for r2 in rectangles if not rect == r2) rectangles = list(map(xywh_to_vec_rect, rectangles)) rectangles = filter(partial(is_not_included, rectangles=rectangles), rectangles) rectangles = map(vec_rect_to_xywh, rectangles) return rectangles # tolerance was set too low (1) most lines are 2px wide def adjacent1d(n, m, tolerance=4): return abs(n - m) <= tolerance Rectangle = namedtuple("Rectangle", "xmin ymin xmax ymax") def adjacent(a, b): """Two rects (v1, v2), (w1, w2) are adjacent if either of: - the x components of v2 and w1 match and the y components of w1 or w2 are in the range of the y components of v1 and v2 - the x components of v1 and w2 match and the y components of w1 or w2 are in the range of the y components of v1 and v2 - the y components of v2 and w1 match and the x components of w1 or w2 are in the range of the x components of v1 and v2 - the y components of v1 and w2 match and the x components of w1 or w2 are in the range of the x components of v1 and v2 """ def adjacent2d(g, h, i, j, k, l): # print(adjacent1d(g, h), any(k <= p <= l for p in [i, j])) return adjacent1d(g, h) and any(k <= p <= l for p in [i, j]) if any(x is None for x in (a, b)): return False v1 = a.xmin, a.ymin v2 = a.xmax, a.ymax w1 = b.xmin, b.ymin w2 = b.xmax, b.ymax return any( ( adjacent2d(v2[0], w1[0], w1[1], w2[1], v1[1], v2[1]), adjacent2d(v1[0], w2[0], w1[1], w2[1], v1[1], v2[1]), adjacent2d(v2[1], w1[1], w1[0], w2[0], v1[0], v2[0]), adjacent2d(v1[1], w2[1], w1[0], w2[0], v1[0], v2[0]), ) ) # FIXME: For some reason some isolated rects remain. def __remove_isolated_unsorted(rectangles): def is_connected(rect, rectangles): return any(adjacent(r2, rect) for r2 in rectangles if not rect == r2) rectangles = list(map(xywh_to_vec_rect, rectangles)) rectangles = filter(partial(is_connected, rectangles=rectangles), rectangles) rectangles = map(vec_rect_to_xywh, rectangles) return rectangles def make_box(x1, y1, x2, y2): keys = "x1", "y1", "x2", "y2" return dict(zip(keys, [x1, y1, x2, y2])) def __remove_isolated_sorted(rectangles): def is_connected(left, center, right): # print(left,center,right) return any(starmap(adjacent, [(left, center), (center, right)])) rectangles = list(map(xywh_to_vec_rect, rectangles)) lefts = [None, *rectangles[:-1]] rights = [*rectangles[1:], None] mask = starmap(is_connected, zip(lefts, rectangles, rights)) rectangles = compress(rectangles, mask) rectangles = map(vec_rect_to_xywh, rectangles) return rectangles def remove_isolated(rectangles, input_sorted=False): return (__remove_isolated_sorted if input_sorted else __remove_isolated_unsorted)(rectangles) Rectangle = namedtuple("Rectangle", "xmin ymin xmax ymax") def compute_intersection(a, b): dx = min(a.xmax, b.xmax) - max(a.xmin, b.xmin) dy = min(a.ymax, b.ymax) - max(a.ymin, b.ymin) return dx * dy if (dx >= 0) and (dy >= 0) else 0 def has_no_parent(hierarchy): return hierarchy[-1] <= 0 def xywh_to_vec_rect(rect): v1, v2 = xywh_to_vecs(rect) return Rectangle(*v1, *v2) def vecs_to_vec_rect(rect): v1, v2 = rect return Rectangle(*v1, *v2) def xywh_to_vecs(rect): x1, y1, w, h = rect x2 = x1 + w y2 = y1 + h return (x1, y1), (x2, y2) def vec_rect_to_xywh(rect): x, y, x2, y2 = rect w = x2 - x h = y2 - y return x, y, w, h