from itertools import compress from itertools import starmap from operator import __and__ import cv2 import numpy as np from pdf2image import pdf2image from vidocp.utils.display import show_mpl from vidocp.utils.draw import draw_rectangles from vidocp.utils.post_processing import remove_overlapping, remove_included, has_no_parent def is_likely_segment(rect, min_area=100): return cv2.contourArea(rect, False) > min_area def find_segments(image): contours, hierarchies = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) mask1 = map(is_likely_segment, contours) mask2 = map(has_no_parent, hierarchies[0]) mask = starmap(__and__, zip(mask1, mask2)) contours = compress(contours, mask) rectangles = (cv2.boundingRect(c) for c in contours) return rectangles def parse_layout(image: np.array): image = image.copy() image_ = image.copy() if len(image_.shape) > 2: image_ = cv2.cvtColor(image_, cv2.COLOR_BGR2GRAY) image_ = cv2.GaussianBlur(image_, (7, 7), 0) thresh = cv2.threshold(image_, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) dilate = cv2.dilate(thresh, kernel, iterations=4) rects = list(find_segments(dilate)) # -> Run meta detection on the previous detections TODO: refactor for rect in rects: x, y, w, h = rect cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1) cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 7) _, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY) image = ~image if len(image.shape) > 2: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) rects = find_segments(image) # <- End of meta detection rects = remove_included(rects) rects = remove_overlapping(rects) return list(rects) def annotate_layout_in_pdf(pdf_path, page_index=1, show=False): page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0] page = np.array(page) rects = parse_layout(page) page = draw_rectangles(page, rects) if show: show_mpl(page) else: return page