Merge in RR/vidocp from layout_detection_version_3 to master
Squashed commit of the following:
commit 262b1c14c0b8b164221d39fd286b20914d1a8e6a
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Sat Feb 5 22:56:10 2022 +0100
comment
commit 975dcdaae2b0e9bfcb075fe1c87adc48175c0d93
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Sat Feb 5 22:50:41 2022 +0100
applied black
commit 49ba3b5f318a1b5d6bb39c0b53de5e237a87da96
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Sat Feb 5 22:48:44 2022 +0100
improved layout parsing logic: filtering of included rects
commit d78ac24c10793f72b569c3c827834400b730888a
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Sat Feb 5 22:36:49 2022 +0100
improved layout parsing logic: filtering of overlaps, no sub-text regions
131 lines
3.5 KiB
Python
131 lines
3.5 KiB
Python
from collections import namedtuple
|
|
from functools import partial
|
|
from itertools import compress
|
|
from itertools import starmap
|
|
from operator import __and__
|
|
|
|
import cv2
|
|
import numpy as np
|
|
from pdf2image import pdf2image
|
|
|
|
from vidocp.utils import draw_rectangles, show_mpl
|
|
|
|
Rectangle = namedtuple("Rectangle", "xmin ymin xmax ymax")
|
|
|
|
|
|
def make_box(x1, y1, x2, y2):
|
|
keys = "x1", "y1", "x2", "y2"
|
|
return dict(zip(keys, [x1, y1, x2, y2]))
|
|
|
|
|
|
def compute_intersection(a, b):
|
|
|
|
dx = min(a.xmax, b.xmax) - max(a.xmin, b.xmin)
|
|
dy = min(a.ymax, b.ymax) - max(a.ymin, b.ymin)
|
|
|
|
return dx * dy if (dx >= 0) and (dy >= 0) else 0
|
|
|
|
|
|
def is_likely_segment(rect, min_area=100):
|
|
return cv2.contourArea(rect, False) > min_area
|
|
|
|
|
|
def has_no_parent(hierarchy):
|
|
return hierarchy[-1] <= 0
|
|
|
|
|
|
def xywh_to_vec_rect(rect):
|
|
x1, y1, w, h = rect
|
|
x2 = x1 + w
|
|
y2 = y1 + h
|
|
return Rectangle(x1, y1, x2, y2)
|
|
|
|
|
|
def vec_rect_to_xywh(rect):
|
|
x, y, x2, y2 = rect
|
|
w = x2 - x
|
|
h = y2 - y
|
|
return x, y, w, h
|
|
|
|
|
|
def remove_overlapping(rectangles):
|
|
def overlap(a, b):
|
|
return compute_intersection(a, b) > 0
|
|
|
|
def does_not_overlap(rect, rectangles):
|
|
return not any(overlap(rect, r2) for r2 in rectangles if not rect == r2)
|
|
|
|
rectangles = list(map(xywh_to_vec_rect, rectangles))
|
|
rectangles = filter(partial(does_not_overlap, rectangles=rectangles), rectangles)
|
|
rectangles = map(vec_rect_to_xywh, rectangles)
|
|
return rectangles
|
|
|
|
|
|
def remove_included(rectangles):
|
|
def included(a, b):
|
|
return b.xmin >= a.xmin and b.ymin >= a.ymin and b.xmax <= a.xmax and b.ymax <= a.ymax
|
|
|
|
def is_not_included(rect, rectangles):
|
|
return not any(included(r2, rect) for r2 in rectangles if not rect == r2)
|
|
|
|
rectangles = list(map(xywh_to_vec_rect, rectangles))
|
|
rectangles = filter(partial(is_not_included, rectangles=rectangles), rectangles)
|
|
rectangles = map(vec_rect_to_xywh, rectangles)
|
|
return rectangles
|
|
|
|
|
|
def find_segments(image):
|
|
contours, hierarchies = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
|
|
mask1 = map(is_likely_segment, contours)
|
|
mask2 = map(has_no_parent, hierarchies[0])
|
|
mask = starmap(__and__, zip(mask1, mask2))
|
|
contours = compress(contours, mask)
|
|
|
|
rectangles = (cv2.boundingRect(c) for c in contours)
|
|
|
|
return rectangles
|
|
|
|
|
|
def parse_layout(image: np.array):
|
|
|
|
image = image.copy()
|
|
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
blur = cv2.GaussianBlur(gray, (7, 7), 0)
|
|
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
|
|
|
|
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
|
|
dilate = cv2.dilate(thresh, kernel, iterations=4)
|
|
|
|
rects = list(find_segments(dilate))
|
|
|
|
# -> Run meta detection on the previous detections TODO: refactor
|
|
for rect in rects:
|
|
x, y, w, h = rect
|
|
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1)
|
|
cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 7)
|
|
|
|
_, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY)
|
|
image = ~image
|
|
|
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
rects = find_segments(image)
|
|
# <- End of meta detection
|
|
|
|
rects = remove_included(rects)
|
|
rects = remove_overlapping(rects)
|
|
|
|
return rects
|
|
|
|
|
|
def annotate_layout_in_pdf(pdf_path, page_index=1):
|
|
|
|
page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0]
|
|
page = np.array(page)
|
|
|
|
rects = parse_layout(page)
|
|
page = draw_rectangles(page, rects)
|
|
|
|
show_mpl(page)
|