Pull request #6: added layout parsing logic

Merge in RR/vidocp from layout_detection_version_2 to master

Squashed commit of the following:

commit d443e95ad8143bed3efc74d9e38640498d8d16bf
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Sat Feb 5 20:16:13 2022 +0100

    readme updated

commit 953ad696932454ce851544ed016f9e64bcc12080
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Sat Feb 5 20:14:59 2022 +0100

    added layot parsing logic
This commit is contained in:
Matthias Bisping 2022-02-05 20:17:14 +01:00
parent 00748a8ac0
commit bb5707dc89
6 changed files with 77 additions and 4 deletions

View File

@ -74,3 +74,15 @@ python scripts/annotate.py <path to pdf> 0 --type redaction
The below image shows the detected redactions with green outlines.
![](data/redaction_detection.png)
#### Layout Parsing
The layout parsing utility detects elements such as paragraphs, tables and figures.
```bash
python scripts/annotate.py data/test_pdf.pdf 7 --type layout
```
The below image shows the detected layout elements on a page.
![](data/layout_parsing.png)

BIN
data/layout_parsing.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 568 KiB

View File

@ -2,7 +2,7 @@ import argparse
from vidocp.table_parsing import annotate_tables_in_pdf
from vidocp.redaction_detection import annotate_boxes_in_pdf
from vidocp.layout_detection import annotate_layout_in_pdf
from vidocp.layout_parsing import annotate_layout_in_pdf
def parse_args():

38
vidocp/layout_parsing.py Normal file
View File

@ -0,0 +1,38 @@
import cv2
import numpy as np
from pdf2image import pdf2image
from vidocp.utils import draw_rectangles, show_mpl
def is_likely_segment(rect, min_area=1000):
return cv2.contourArea(rect, False) > min_area
def parse_layout(image: np.array):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (7, 7), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
dilate = cv2.dilate(thresh, kernel, iterations=4)
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = filter(is_likely_segment, cnts)
rects = (cv2.boundingRect(c) for c in cnts)
return rects
def annotate_layout_in_pdf(pdf_path, page_index=1):
page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0]
page = np.array(page)
rects = parse_layout(page)
page = draw_rectangles(page, rects)
show_mpl(page)

View File

@ -35,7 +35,7 @@ def find_redactions(image: np.array, min_normalized_area=200000):
blurred = cv2.GaussianBlur(gray, (5, 5), 1)
thresh = cv2.threshold(blurred, 252, 255, cv2.THRESH_BINARY)[1]
contours, hierarchies = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
contours, hierarchies = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
contours = map(
first, starfilter(partial(is_likely_redaction, min_area=min_normalized_area), zip(contours, hierarchies[0]))

View File

@ -16,9 +16,20 @@ def show_cv2(image):
cv2.waitKey(0)
def draw_contours(image, contours):
def copy_and_normalize_channels(image):
image = image.copy()
try:
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
except cv2.error:
pass
return image
def draw_contours(image, contours):
image = copy_and_normalize_channels(image)
for cont in contours:
cv2.drawContours(image, cont, -1, (0, 255, 0), 4)
@ -26,9 +37,21 @@ def draw_contours(image, contours):
return image
def draw_rectangles(image, rectangles):
image = copy_and_normalize_channels(image)
for rect in rectangles:
x, y, w, h = rect
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
return image
def draw_stats(image, stats, annotate=False):
image = image.copy()
image = copy_and_normalize_channels(image)
keys = ["x", "y", "w", "h"]
def annotate_stat(x, y, w, h):