diff --git a/vidocp/figure_detection.py b/vidocp/figure_detection.py index b5fd38b..830fc97 100644 --- a/vidocp/figure_detection.py +++ b/vidocp/figure_detection.py @@ -2,7 +2,8 @@ import cv2 import numpy as np from pdf2image import pdf2image -from vidocp.utils import show_mpl, draw_rectangles, remove_included +from vidocp.utils import show_mpl, draw_rectangles, remove_included, remove_primary_text_regions, \ + __detect_large_coherent_structures def is_large_enough(cont, min_area=10000): @@ -18,67 +19,6 @@ def is_likely_figure(cont, min_area=5000, max_width_to_hight_ratio=6): return is_large_enough(cont, min_area) and has_acceptable_format(cont, max_width_to_hight_ratio) -def remove_primary_text_regions(image): - """Removes regions of primary text, meaning no figure descriptions for example, but main text body paragraphs. - - Args: - image: Image to remove primary text from. - - Returns: - Image with primary text removed. - - References: - https://stackoverflow.com/questions/58349726/opencv-how-to-remove-text-from-background - """ - - def filter_likely_primary_text_segments(cnts): - for c in cnts: - area = cv2.contourArea(c) - if 800 < area < 15000: - yield cv2.boundingRect(c) - - image = image.copy() - - gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - - thresh = cv2.threshold(gray, 253, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] - - close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 3)) - close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, close_kernel, iterations=1) - - dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3)) - dilate = cv2.dilate(close, dilate_kernel, iterations=1) - - cnts, _ = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) - - for rect in filter_likely_primary_text_segments(cnts): - x, y, w, h = rect - cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), -1) - - return image - - -def __detect_large_coherent_structures(image: np.array): - """Detects large coherent structures on an image. - - References: - https://stackoverflow.com/questions/60259169/how-to-group-nearby-contours-in-opencv-python-zebra-crossing-detection - """ - gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - - thresh = cv2.threshold(gray, 253, 255, cv2.THRESH_BINARY)[1] - - dilate_kernel = cv2.getStructuringElement(cv2.MORPH_OPEN, (5, 5)) - dilate = cv2.dilate(~thresh, dilate_kernel, iterations=4) - - close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 20)) - close = cv2.morphologyEx(dilate, cv2.MORPH_CLOSE, close_kernel, iterations=1) - - cnts, _ = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) - - return cnts - - def detect_figures(image: np.array): image = image.copy() diff --git a/vidocp/utils.py b/vidocp/utils.py index ee528b4..8b9235f 100644 --- a/vidocp/utils.py +++ b/vidocp/utils.py @@ -2,6 +2,7 @@ from collections import namedtuple from functools import partial import cv2 +import numpy as np from matplotlib import pyplot as plt @@ -141,3 +142,64 @@ def vec_rect_to_xywh(rect): w = x2 - x h = y2 - y return x, y, w, h + + +def remove_primary_text_regions(image): + """Removes regions of primary text, meaning no figure descriptions for example, but main text body paragraphs. + + Args: + image: Image to remove primary text from. + + Returns: + Image with primary text removed. + + References: + https://stackoverflow.com/questions/58349726/opencv-how-to-remove-text-from-background + """ + + def filter_likely_primary_text_segments(cnts): + for c in cnts: + area = cv2.contourArea(c) + if 800 < area < 15000: + yield cv2.boundingRect(c) + + image = image.copy() + + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + + thresh = cv2.threshold(gray, 253, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] + + close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 3)) + close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, close_kernel, iterations=1) + + dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3)) + dilate = cv2.dilate(close, dilate_kernel, iterations=1) + + cnts, _ = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + + for rect in filter_likely_primary_text_segments(cnts): + x, y, w, h = rect + cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), -1) + + return image + + +def __detect_large_coherent_structures(image: np.array): + """Detects large coherent structures on an image. + + References: + https://stackoverflow.com/questions/60259169/how-to-group-nearby-contours-in-opencv-python-zebra-crossing-detection + """ + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + + thresh = cv2.threshold(gray, 253, 255, cv2.THRESH_BINARY)[1] + + dilate_kernel = cv2.getStructuringElement(cv2.MORPH_OPEN, (5, 5)) + dilate = cv2.dilate(~thresh, dilate_kernel, iterations=4) + + close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 20)) + close = cv2.morphologyEx(dilate, cv2.MORPH_CLOSE, close_kernel, iterations=1) + + cnts, _ = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + return cnts \ No newline at end of file