diff --git a/cv_analysis/figure_detection/text.py b/cv_analysis/figure_detection/text.py index a179e27..5f5a60d 100644 --- a/cv_analysis/figure_detection/text.py +++ b/cv_analysis/figure_detection/text.py @@ -1,6 +1,6 @@ import cv2 -from cv_analysis.layout_parsing import normalize_to_gray_scale +from cv_analysis.utils.common import normalize_to_gray_scale def remove_primary_text_regions(image): diff --git a/cv_analysis/layout_parsing.py b/cv_analysis/layout_parsing.py index d8e6ac6..85da4d4 100644 --- a/cv_analysis/layout_parsing.py +++ b/cv_analysis/layout_parsing.py @@ -1,12 +1,19 @@ -from functools import reduce, partial +from functools import partial from typing import Iterable, List import cv2 import numpy as np from funcy import compose, rcompose, lkeep -from cv_analysis.utils.common import find_contours -from cv_analysis.utils.conversion import box_to_rectangle, contour_to_rectangle +from cv_analysis.utils.common import ( + find_contours, + dilate_page_components, + normalize_to_gray_scale, + threshold_image, + invert_image, + fill_rectangles, +) +from cv_analysis.utils.conversion import contour_to_rectangle from cv_analysis.utils.merging import connect_related_rectangles from cv_analysis.utils.postprocessing import remove_included, has_no_parent from cv_analysis.utils.rectangle import Rectangle @@ -47,23 +54,6 @@ def __find_segments(image: np.ndarray) -> List[Rectangle]: return rectangles -def is_likely_segment(rectangle: Rectangle, min_area: float = 100) -> bool: - # FIXME: Parameterize via factory - return cv2.contourArea(rectangle, False) > min_area - - -def dilate_page_components(image: np.ndarray) -> np.ndarray: - # FIXME: Parameterize via factory - image = cv2.GaussianBlur(image, (7, 7), 0) - # FIXME: Parameterize via factory - thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] - # FIXME: Parameterize via factory - kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) - # FIXME: Parameterize via factory - dilate = cv2.dilate(thresh, kernel, iterations=4) - return dilate - - def prepare_for_meta_detection(image: np.ndarray, rectangles: Iterable[Rectangle]) -> np.ndarray: image = fill_rectangles(image, rectangles) @@ -74,29 +64,6 @@ def prepare_for_meta_detection(image: np.ndarray, rectangles: Iterable[Rectangle return image -def normalize_to_gray_scale(image: np.ndarray) -> np.ndarray: - image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image - return image - - -def threshold_image(image: np.ndarray) -> np.ndarray: +def is_likely_segment(rectangle: Rectangle, min_area: float = 100) -> bool: # FIXME: Parameterize via factory - _, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY) - return image - - -def invert_image(image: np.ndarray): - return ~image - - -def fill_rectangles(image: np.ndarray, rectangles: Iterable[Rectangle]) -> np.ndarray: - image = reduce(fill_in_component_area, rectangles, image) - return image - - -def fill_in_component_area(image: np.ndarray, rect: Rectangle) -> np.ndarray: - x, y, w, h = rect - cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1) - cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 7) - - return image + return cv2.contourArea(rectangle, False) > min_area diff --git a/cv_analysis/utils/common.py b/cv_analysis/utils/common.py index b7bc067..a77e27e 100644 --- a/cv_analysis/utils/common.py +++ b/cv_analysis/utils/common.py @@ -1,7 +1,51 @@ +from functools import reduce +from typing import Iterable + import cv2 +import numpy as np from funcy import first +from cv_analysis.utils.rectangle import Rectangle + def find_contours(image): contours, hierarchies = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) return contours, first(hierarchies) if hierarchies is not None else None + + +def dilate_page_components(image: np.ndarray) -> np.ndarray: + # FIXME: Parameterize via factory + image = cv2.GaussianBlur(image, (7, 7), 0) + # FIXME: Parameterize via factory + thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] + # FIXME: Parameterize via factory + kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) + # FIXME: Parameterize via factory + dilate = cv2.dilate(thresh, kernel, iterations=4) + return dilate + + +def normalize_to_gray_scale(image: np.ndarray) -> np.ndarray: + image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image + return image + + +def threshold_image(image: np.ndarray) -> np.ndarray: + # FIXME: Parameterize via factory + _, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY) + return image + + +def invert_image(image: np.ndarray): + return ~image + + +def fill_rectangles(image: np.ndarray, rectangles: Iterable[Rectangle]) -> np.ndarray: + image = reduce(fill_in_component_area, rectangles, image) + return image + + +def fill_in_component_area(image: np.ndarray, rectangle: Rectangle) -> np.ndarray: + cv2.rectangle(image, (rectangle.x1, rectangle.y1), (rectangle.x2, rectangle.y2), (0, 0, 0), -1) + cv2.rectangle(image, (rectangle.x1, rectangle.y1), (rectangle.x2, rectangle.y2), (255, 255, 255), 7) + return image