diff --git a/cv_analysis/utils/postprocessing.py b/cv_analysis/utils/postprocessing.py index 4069fd4..0586908 100644 --- a/cv_analysis/utils/postprocessing.py +++ b/cv_analysis/utils/postprocessing.py @@ -2,6 +2,8 @@ from functools import partial from itertools import starmap, compress from typing import Iterable, List, Sequence +from funcy import lremove + from cv_analysis.utils.rectangle import Rectangle @@ -21,6 +23,16 @@ def remove_included(rectangles: Iterable[Rectangle]) -> List[Rectangle]: return rectangles_to_keep +def remove_small(boxes: Iterable[Rectangle], page_width, page_height, min_percentage=0.13) -> List[Rectangle]: + min_width = page_width * min_percentage + min_height = page_height * min_percentage + + def small(box: Rectangle): + return box.width < min_width or box.height < min_height + + return lremove(small, boxes) + + def __remove_isolated_unsorted(rectangles: Iterable[Rectangle]) -> List[Rectangle]: def is_connected(rect: Rectangle, rectangles: Iterable[Rectangle]): return any(rect.adjacent(rect2) for rect2 in rectangles if not rect == rect2) diff --git a/test/fixtures/page_generation/page.py b/test/fixtures/page_generation/page.py index bfb0b22..55e253f 100644 --- a/test/fixtures/page_generation/page.py +++ b/test/fixtures/page_generation/page.py @@ -60,7 +60,6 @@ from funcy import ( omit, project, complement, - lremove, chunks, ) @@ -1297,16 +1296,6 @@ def get_child_boxes(box: Rectangle, split_coordinate, axis, margin_percentage) - ) -def drop_small_boxes(boxes: Iterable[Rectangle], page_width, page_height, min_percentage=0.13) -> List[Rectangle]: - min_width = page_width * min_percentage - min_height = page_height * min_percentage - - def small(box: Rectangle): - return box.width < min_width or box.height < min_height - - return lremove(small, boxes) - - @pytest.fixture( params=[ TwoColumnPagePartitioner,