refactoring

This commit is contained in:
Matthias Bisping 2022-02-06 14:31:21 +01:00
parent 98d77cb522
commit aa66b6865b
4 changed files with 25 additions and 29 deletions

View File

@ -3,16 +3,7 @@ import numpy as np
from pdf2image import pdf2image
from vidocp.utils import show_mpl, draw_rectangles, remove_included, remove_primary_text_regions, \
__detect_large_coherent_structures
def is_large_enough(cont, min_area=10000):
return cv2.contourArea(cont, False) > min_area
def has_acceptable_format(cont, max_width_to_hight_ratio=6):
_, _, w, h = cv2.boundingRect(cont)
return max_width_to_hight_ratio >= w / h >= (1 / max_width_to_hight_ratio)
__detect_large_coherent_structures, is_large_enough, has_acceptable_format
def is_likely_figure(cont, min_area=5000, max_width_to_hight_ratio=6):

View File

@ -5,22 +5,7 @@ import numpy as np
import pdf2image
from iteration_utilities import starfilter, first
from vidocp.utils import show_mpl, draw_contours
def is_filled(hierarchy):
# See https://stackoverflow.com/questions/60095520/how-to-distinguish-filled-circle-contour-and-unfilled-circle-contour-in-opencv
return hierarchy[3] <= 0 and hierarchy[2] == -1
def is_boxy(contour):
epsilon = 0.01 * cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, epsilon, True)
return len(approx) <= 10
def is_large_enough(contour, min_area):
return cv2.contourArea(contour, False) > min_area
from vidocp.utils import show_mpl, draw_contours, is_large_enough, is_filled, is_boxy
def is_likely_redaction(contour, hierarchy, min_area):

View File

@ -2,7 +2,7 @@ import cv2
import numpy as np
from pdf2image import pdf2image
from vidocp.utils import show_cv2, draw_stats
from vidocp.utils import draw_stats, show_mpl
def add_external_contours(image, img):
@ -52,4 +52,4 @@ def annotate_tables_in_pdf(pdf_path, page_index=1):
stats = parse_table(page)
page = draw_stats(page, stats)
show_cv2(page)
show_mpl(page)

View File

@ -202,4 +202,24 @@ def __detect_large_coherent_structures(image: np.array):
cnts, _ = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
return cnts
return cnts
def is_large_enough(cont, min_area):
return cv2.contourArea(cont, False) > min_area
def has_acceptable_format(cont, max_width_to_hight_ratio):
_, _, w, h = cv2.boundingRect(cont)
return max_width_to_hight_ratio >= w / h >= (1 / max_width_to_hight_ratio)
def is_filled(hierarchy):
# See https://stackoverflow.com/questions/60095520/how-to-distinguish-filled-circle-contour-and-unfilled-circle-contour-in-opencv
return hierarchy[3] <= 0 and hierarchy[2] == -1
def is_boxy(contour):
epsilon = 0.01 * cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, epsilon, True)
return len(approx) <= 10