diff --git a/README.md b/README.md index f2f2fe2..5db6805 100644 --- a/README.md +++ b/README.md @@ -74,3 +74,15 @@ python scripts/annotate.py 0 --type redaction The below image shows the detected redactions with green outlines. ![](data/redaction_detection.png) + + +#### Layout Parsing + +The layout parsing utility detects elements such as paragraphs, tables and figures. +```bash +python scripts/annotate.py data/test_pdf.pdf 7 --type layout +``` + +The below image shows the detected layout elements on a page. + +![](data/layout_parsing.png) diff --git a/data/layout_parsing.png b/data/layout_parsing.png new file mode 100644 index 0000000..6b2a12a Binary files /dev/null and b/data/layout_parsing.png differ diff --git a/scripts/annotate.py b/scripts/annotate.py index 4ada4ad..d690988 100644 --- a/scripts/annotate.py +++ b/scripts/annotate.py @@ -2,7 +2,7 @@ import argparse from vidocp.table_parsing import annotate_tables_in_pdf from vidocp.redaction_detection import annotate_boxes_in_pdf -from vidocp.layout_detection import annotate_layout_in_pdf +from vidocp.layout_parsing import annotate_layout_in_pdf def parse_args(): diff --git a/vidocp/layout_parsing.py b/vidocp/layout_parsing.py new file mode 100644 index 0000000..bc42d83 --- /dev/null +++ b/vidocp/layout_parsing.py @@ -0,0 +1,38 @@ +import cv2 +import numpy as np +from pdf2image import pdf2image + +from vidocp.utils import draw_rectangles, show_mpl + + +def is_likely_segment(rect, min_area=1000): + return cv2.contourArea(rect, False) > min_area + + +def parse_layout(image: np.array): + + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + blur = cv2.GaussianBlur(gray, (7, 7), 0) + thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] + + kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) + dilate = cv2.dilate(thresh, kernel, iterations=4) + + cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + cnts = cnts[0] if len(cnts) == 2 else cnts[1] + cnts = filter(is_likely_segment, cnts) + + rects = (cv2.boundingRect(c) for c in cnts) + + return rects + + +def annotate_layout_in_pdf(pdf_path, page_index=1): + + page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0] + page = np.array(page) + + rects = parse_layout(page) + page = draw_rectangles(page, rects) + + show_mpl(page) diff --git a/vidocp/redaction_detection.py b/vidocp/redaction_detection.py index e5908e3..f1b319a 100644 --- a/vidocp/redaction_detection.py +++ b/vidocp/redaction_detection.py @@ -35,7 +35,7 @@ def find_redactions(image: np.array, min_normalized_area=200000): blurred = cv2.GaussianBlur(gray, (5, 5), 1) thresh = cv2.threshold(blurred, 252, 255, cv2.THRESH_BINARY)[1] - contours, hierarchies = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) + contours, hierarchies = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) contours = map( first, starfilter(partial(is_likely_redaction, min_area=min_normalized_area), zip(contours, hierarchies[0])) diff --git a/vidocp/utils.py b/vidocp/utils.py index b540356..edff67c 100644 --- a/vidocp/utils.py +++ b/vidocp/utils.py @@ -16,9 +16,20 @@ def show_cv2(image): cv2.waitKey(0) -def draw_contours(image, contours): +def copy_and_normalize_channels(image): image = image.copy() + try: + image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) + except cv2.error: + pass + + return image + + +def draw_contours(image, contours): + + image = copy_and_normalize_channels(image) for cont in contours: cv2.drawContours(image, cont, -1, (0, 255, 0), 4) @@ -26,9 +37,21 @@ def draw_contours(image, contours): return image +def draw_rectangles(image, rectangles): + + image = copy_and_normalize_channels(image) + + for rect in rectangles: + x, y, w, h = rect + cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2) + + return image + + def draw_stats(image, stats, annotate=False): - image = image.copy() + image = copy_and_normalize_channels(image) + keys = ["x", "y", "w", "h"] def annotate_stat(x, y, w, h):