diff --git a/cv_analysis/figure_detection/figures.py b/cv_analysis/figure_detection/figures.py index b5468dd..42c6963 100644 --- a/cv_analysis/figure_detection/figures.py +++ b/cv_analysis/figure_detection/figures.py @@ -1,7 +1,7 @@ import cv2 import numpy as np -from cv_analysis.utils.common import find_contours +from cv_analysis.utils.common import find_contours_and_hierarchies def detect_large_coherent_structures(image: np.array): @@ -28,6 +28,6 @@ def detect_large_coherent_structures(image: np.array): # FIXME: Parameterize via factory close = cv2.morphologyEx(dilate, cv2.MORPH_CLOSE, close_kernel, iterations=1) # TODO: Tweak iterations - contours, _ = find_contours(close) + contours, _ = find_contours_and_hierarchies(close) return contours diff --git a/cv_analysis/layout_parsing.py b/cv_analysis/layout_parsing.py index 2c92d84..5797b2b 100644 --- a/cv_analysis/layout_parsing.py +++ b/cv_analysis/layout_parsing.py @@ -5,8 +5,9 @@ import cv2 import numpy as np from funcy import compose, rcompose, lkeep +from cv_analysis.utils import lstarkeep from cv_analysis.utils.common import ( - find_contours, + find_contours_and_hierarchies, dilate_page_components, normalize_to_gray_scale, threshold_image, @@ -20,6 +21,14 @@ from cv_analysis.utils.rectangle import Rectangle def parse_layout(image: np.array) -> List[Rectangle]: + """Parse the layout of a page. + + Args: + image: Image of the page. + + Returns: + List of rectangles representing the layout of the page as identified page elements. + """ rectangles = rcompose( find_segments, remove_included, @@ -31,6 +40,7 @@ def parse_layout(image: np.array) -> List[Rectangle]: def find_segments(image: np.ndarray) -> List[Rectangle]: + """Find segments in a page. Segments are structural elements of a page, such as text blocks, tables, etc.""" rectangles = rcompose( prepare_for_initial_detection, __find_segments, @@ -49,17 +59,18 @@ def __find_segments(image: np.ndarray) -> List[Rectangle]: def to_rectangle_if_valid(contour, hierarchy): return contour_to_rectangle(contour) if is_likely_segment(contour) and has_no_parent(hierarchy) else None - rectangles = lkeep(map(to_rectangle_if_valid, *find_contours(image))) + rectangles = lstarkeep(to_rectangle_if_valid, zip(*find_contours_and_hierarchies(image))) return rectangles def prepare_for_meta_detection(image: np.ndarray, rectangles: Iterable[Rectangle]) -> np.ndarray: - - image = fill_rectangles(image, rectangles) - image = threshold_image(image) - image = invert_image(image) - image = normalize_to_gray_scale(image) + image = rcompose( + fill_rectangles, + threshold_image, + invert_image, + normalize_to_gray_scale, + )(image, rectangles) return image diff --git a/cv_analysis/utils/common.py b/cv_analysis/utils/common.py index a77e27e..597a1f0 100644 --- a/cv_analysis/utils/common.py +++ b/cv_analysis/utils/common.py @@ -8,7 +8,7 @@ from funcy import first from cv_analysis.utils.rectangle import Rectangle -def find_contours(image): +def find_contours_and_hierarchies(image): contours, hierarchies = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) return contours, first(hierarchies) if hierarchies is not None else None diff --git a/cv_analysis/utils/utils.py b/cv_analysis/utils/utils.py index 69f80e4..d82aa2e 100644 --- a/cv_analysis/utils/utils.py +++ b/cv_analysis/utils/utils.py @@ -1,7 +1,7 @@ from __future__ import annotations import cv2 -from funcy import first, iterate +from funcy import first, iterate, keep from numpy import generic @@ -30,12 +30,20 @@ def lift(fn): def star(fn): - def starred(*args): + def starred(args): return fn(*args) return starred +def lstarkeep(fn, coll): + return list(starkeep(fn, coll)) + + +def starkeep(fn, coll): + yield from keep(star(fn), coll) + + def until(cond, func, *args, **kwargs): return first(filter(cond, iterate(func, *args, **kwargs)))