Pull request #6: added layout parsing logic

Merge in RR/vidocp from layout_detection_version_2 to master Squashed commit of the following: commit d443e95ad8143bed3efc74d9e38640498d8d16bf Author: Matthias Bisping <matthias.bisping@iqser.com> Date: Sat Feb 5 20:16:13 2022 +0100 readme updated commit 953ad696932454ce851544ed016f9e64bcc12080 Author: Matthias Bisping <matthias.bisping@iqser.com> Date: Sat Feb 5 20:14:59 2022 +0100 added layot parsing logic
2022-02-05 20:17:14 +01:00 · 2022-02-05 20:17:14 +01:00 · bb5707dc89
commit bb5707dc89
parent 00748a8ac0
6 changed files with 77 additions and 4 deletions
--- a/README.md
+++ b/README.md
@ -74,3 +74,15 @@ python scripts/annotate.py <path to pdf> 0 --type redaction
 The below image shows the detected redactions with green outlines.

 ![](data/redaction_detection.png)
+
+
+#### Layout Parsing
+
+The layout parsing utility detects elements such as paragraphs, tables and figures.
+```bash
+python scripts/annotate.py data/test_pdf.pdf 7 --type layout
+```
+
+The below image shows the detected layout elements on a page.
+
+![](data/layout_parsing.png)
--- a/data/layout_parsing.png
+++ b/data/layout_parsing.png
--- a/scripts/annotate.py
+++ b/scripts/annotate.py
@ -2,7 +2,7 @@ import argparse

 from vidocp.table_parsing import annotate_tables_in_pdf
 from vidocp.redaction_detection import annotate_boxes_in_pdf
-from vidocp.layout_detection import annotate_layout_in_pdf
+from vidocp.layout_parsing import annotate_layout_in_pdf


 def parse_args():
--- a/vidocp/layout_parsing.py
+++ b/vidocp/layout_parsing.py
@ -0,0 +1,38 @@
+import cv2
+import numpy as np
+from pdf2image import pdf2image
+
+from vidocp.utils import draw_rectangles, show_mpl
+
+
+def is_likely_segment(rect, min_area=1000):
+    return cv2.contourArea(rect, False) > min_area
+
+
+def parse_layout(image: np.array):
+
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    blur = cv2.GaussianBlur(gray, (7, 7), 0)
+    thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
+
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+    dilate = cv2.dilate(thresh, kernel, iterations=4)
+
+    cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
+    cnts = filter(is_likely_segment, cnts)
+
+    rects = (cv2.boundingRect(c) for c in cnts)
+
+    return rects
+
+
+def annotate_layout_in_pdf(pdf_path, page_index=1):
+
+    page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0]
+    page = np.array(page)
+
+    rects = parse_layout(page)
+    page = draw_rectangles(page, rects)
+
+    show_mpl(page)
--- a/vidocp/redaction_detection.py
+++ b/vidocp/redaction_detection.py
@ -35,7 +35,7 @@ def find_redactions(image: np.array, min_normalized_area=200000):
    blurred = cv2.GaussianBlur(gray, (5, 5), 1)
    thresh = cv2.threshold(blurred, 252, 255, cv2.THRESH_BINARY)[1]

-    contours, hierarchies = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
+    contours, hierarchies = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)

    contours = map(
        first, starfilter(partial(is_likely_redaction, min_area=min_normalized_area), zip(contours, hierarchies[0]))
--- a/vidocp/utils.py
+++ b/vidocp/utils.py
@ -16,9 +16,20 @@ def show_cv2(image):
    cv2.waitKey(0)


-def draw_contours(image, contours):
+def copy_and_normalize_channels(image):

    image = image.copy()
+    try:
+        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
+    except cv2.error:
+        pass
+
+    return image
+
+
+def draw_contours(image, contours):
+
+    image = copy_and_normalize_channels(image)

    for cont in contours:
        cv2.drawContours(image, cont, -1, (0, 255, 0), 4)
@ -26,9 +37,21 @@ def draw_contours(image, contours):
    return image


+def draw_rectangles(image, rectangles):
+
+    image = copy_and_normalize_channels(image)
+
+    for rect in rectangles:
+        x, y, w, h = rect
+        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
+
+    return image
+
+
 def draw_stats(image, stats, annotate=False):

-    image = image.copy()
+    image = copy_and_normalize_channels(image)
+
    keys = ["x", "y", "w", "h"]

    def annotate_stat(x, y, w, h):