diff --git a/vidocp/figure_detection.py b/vidocp/figure_detection.py index 830fc97..5cb44ca 100644 --- a/vidocp/figure_detection.py +++ b/vidocp/figure_detection.py @@ -3,16 +3,7 @@ import numpy as np from pdf2image import pdf2image from vidocp.utils import show_mpl, draw_rectangles, remove_included, remove_primary_text_regions, \ - __detect_large_coherent_structures - - -def is_large_enough(cont, min_area=10000): - return cv2.contourArea(cont, False) > min_area - - -def has_acceptable_format(cont, max_width_to_hight_ratio=6): - _, _, w, h = cv2.boundingRect(cont) - return max_width_to_hight_ratio >= w / h >= (1 / max_width_to_hight_ratio) + __detect_large_coherent_structures, is_large_enough, has_acceptable_format def is_likely_figure(cont, min_area=5000, max_width_to_hight_ratio=6): diff --git a/vidocp/redaction_detection.py b/vidocp/redaction_detection.py index f1b319a..31cb3b1 100644 --- a/vidocp/redaction_detection.py +++ b/vidocp/redaction_detection.py @@ -5,22 +5,7 @@ import numpy as np import pdf2image from iteration_utilities import starfilter, first -from vidocp.utils import show_mpl, draw_contours - - -def is_filled(hierarchy): - # See https://stackoverflow.com/questions/60095520/how-to-distinguish-filled-circle-contour-and-unfilled-circle-contour-in-opencv - return hierarchy[3] <= 0 and hierarchy[2] == -1 - - -def is_boxy(contour): - epsilon = 0.01 * cv2.arcLength(contour, True) - approx = cv2.approxPolyDP(contour, epsilon, True) - return len(approx) <= 10 - - -def is_large_enough(contour, min_area): - return cv2.contourArea(contour, False) > min_area +from vidocp.utils import show_mpl, draw_contours, is_large_enough, is_filled, is_boxy def is_likely_redaction(contour, hierarchy, min_area): diff --git a/vidocp/table_parsing.py b/vidocp/table_parsing.py index 765fb1c..035f569 100644 --- a/vidocp/table_parsing.py +++ b/vidocp/table_parsing.py @@ -2,7 +2,7 @@ import cv2 import numpy as np from pdf2image import pdf2image -from vidocp.utils import show_cv2, draw_stats +from vidocp.utils import draw_stats, show_mpl def add_external_contours(image, img): @@ -52,4 +52,4 @@ def annotate_tables_in_pdf(pdf_path, page_index=1): stats = parse_table(page) page = draw_stats(page, stats) - show_cv2(page) + show_mpl(page) diff --git a/vidocp/utils.py b/vidocp/utils.py index 8b9235f..752cd3f 100644 --- a/vidocp/utils.py +++ b/vidocp/utils.py @@ -202,4 +202,24 @@ def __detect_large_coherent_structures(image: np.array): cnts, _ = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) - return cnts \ No newline at end of file + return cnts + + +def is_large_enough(cont, min_area): + return cv2.contourArea(cont, False) > min_area + + +def has_acceptable_format(cont, max_width_to_hight_ratio): + _, _, w, h = cv2.boundingRect(cont) + return max_width_to_hight_ratio >= w / h >= (1 / max_width_to_hight_ratio) + + +def is_filled(hierarchy): + # See https://stackoverflow.com/questions/60095520/how-to-distinguish-filled-circle-contour-and-unfilled-circle-contour-in-opencv + return hierarchy[3] <= 0 and hierarchy[2] == -1 + + +def is_boxy(contour): + epsilon = 0.01 * cv2.arcLength(contour, True) + approx = cv2.approxPolyDP(contour, epsilon, True) + return len(approx) <= 10 \ No newline at end of file