cv-analysis-service/vidocp/layout_parsing.py
Matthias Bisping b82a294610 Pull request #7: Layout detection version 3
Merge in RR/vidocp from layout_detection_version_3 to master

Squashed commit of the following:

commit 262b1c14c0b8b164221d39fd286b20914d1a8e6a
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Sat Feb 5 22:56:10 2022 +0100

    comment

commit 975dcdaae2b0e9bfcb075fe1c87adc48175c0d93
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Sat Feb 5 22:50:41 2022 +0100

    applied black

commit 49ba3b5f318a1b5d6bb39c0b53de5e237a87da96
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Sat Feb 5 22:48:44 2022 +0100

    improved layout parsing logic: filtering of included rects

commit d78ac24c10793f72b569c3c827834400b730888a
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Sat Feb 5 22:36:49 2022 +0100

    improved layout parsing logic: filtering of overlaps, no sub-text regions
2022-02-05 22:58:51 +01:00

131 lines
3.5 KiB
Python

from collections import namedtuple
from functools import partial
from itertools import compress
from itertools import starmap
from operator import __and__
import cv2
import numpy as np
from pdf2image import pdf2image
from vidocp.utils import draw_rectangles, show_mpl
Rectangle = namedtuple("Rectangle", "xmin ymin xmax ymax")
def make_box(x1, y1, x2, y2):
keys = "x1", "y1", "x2", "y2"
return dict(zip(keys, [x1, y1, x2, y2]))
def compute_intersection(a, b):
dx = min(a.xmax, b.xmax) - max(a.xmin, b.xmin)
dy = min(a.ymax, b.ymax) - max(a.ymin, b.ymin)
return dx * dy if (dx >= 0) and (dy >= 0) else 0
def is_likely_segment(rect, min_area=100):
return cv2.contourArea(rect, False) > min_area
def has_no_parent(hierarchy):
return hierarchy[-1] <= 0
def xywh_to_vec_rect(rect):
x1, y1, w, h = rect
x2 = x1 + w
y2 = y1 + h
return Rectangle(x1, y1, x2, y2)
def vec_rect_to_xywh(rect):
x, y, x2, y2 = rect
w = x2 - x
h = y2 - y
return x, y, w, h
def remove_overlapping(rectangles):
def overlap(a, b):
return compute_intersection(a, b) > 0
def does_not_overlap(rect, rectangles):
return not any(overlap(rect, r2) for r2 in rectangles if not rect == r2)
rectangles = list(map(xywh_to_vec_rect, rectangles))
rectangles = filter(partial(does_not_overlap, rectangles=rectangles), rectangles)
rectangles = map(vec_rect_to_xywh, rectangles)
return rectangles
def remove_included(rectangles):
def included(a, b):
return b.xmin >= a.xmin and b.ymin >= a.ymin and b.xmax <= a.xmax and b.ymax <= a.ymax
def is_not_included(rect, rectangles):
return not any(included(r2, rect) for r2 in rectangles if not rect == r2)
rectangles = list(map(xywh_to_vec_rect, rectangles))
rectangles = filter(partial(is_not_included, rectangles=rectangles), rectangles)
rectangles = map(vec_rect_to_xywh, rectangles)
return rectangles
def find_segments(image):
contours, hierarchies = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
mask1 = map(is_likely_segment, contours)
mask2 = map(has_no_parent, hierarchies[0])
mask = starmap(__and__, zip(mask1, mask2))
contours = compress(contours, mask)
rectangles = (cv2.boundingRect(c) for c in contours)
return rectangles
def parse_layout(image: np.array):
image = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (7, 7), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
dilate = cv2.dilate(thresh, kernel, iterations=4)
rects = list(find_segments(dilate))
# -> Run meta detection on the previous detections TODO: refactor
for rect in rects:
x, y, w, h = rect
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1)
cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 7)
_, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY)
image = ~image
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
rects = find_segments(image)
# <- End of meta detection
rects = remove_included(rects)
rects = remove_overlapping(rects)
return rects
def annotate_layout_in_pdf(pdf_path, page_index=1):
page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0]
page = np.array(page)
rects = parse_layout(page)
page = draw_rectangles(page, rects)
show_mpl(page)