cv-analysis-service/vidocp/layout_parsing.py

from collections import namedtuple
from functools import partial
from itertools import compress
from itertools import starmap
from operator import __and__

import cv2
import numpy as np
from pdf2image import pdf2image

from vidocp.utils import draw_rectangles, show_mpl

Rectangle = namedtuple("Rectangle", "xmin ymin xmax ymax")


def make_box(x1, y1, x2, y2):
    keys = "x1", "y1", "x2", "y2"
    return dict(zip(keys, [x1, y1, x2, y2]))


def compute_intersection(a, b):

    dx = min(a.xmax, b.xmax) - max(a.xmin, b.xmin)
    dy = min(a.ymax, b.ymax) - max(a.ymin, b.ymin)

    return dx * dy if (dx >= 0) and (dy >= 0) else 0


def is_likely_segment(rect, min_area=100):
    return cv2.contourArea(rect, False) > min_area


def has_no_parent(hierarchy):
    return hierarchy[-1] <= 0


def xywh_to_vec_rect(rect):
    x1, y1, w, h = rect
    x2 = x1 + w
    y2 = y1 + h
    return Rectangle(x1, y1, x2, y2)


def vec_rect_to_xywh(rect):
    x, y, x2, y2 = rect
    w = x2 - x
    h = y2 - y
    return x, y, w, h


def remove_overlapping(rectangles):
    def overlap(a, b):
        return compute_intersection(a, b) > 0

    def does_not_overlap(rect, rectangles):
        return not any(overlap(rect, r2) for r2 in rectangles if not rect == r2)

    rectangles = list(map(xywh_to_vec_rect, rectangles))
    rectangles = filter(partial(does_not_overlap, rectangles=rectangles), rectangles)
    rectangles = map(vec_rect_to_xywh, rectangles)
    return rectangles


def remove_included(rectangles):
    def included(a, b):
        return b.xmin >= a.xmin and b.ymin >= a.ymin and b.xmax <= a.xmax and b.ymax <= a.ymax

    def is_not_included(rect, rectangles):
        return not any(included(r2, rect) for r2 in rectangles if not rect == r2)

    rectangles = list(map(xywh_to_vec_rect, rectangles))
    rectangles = filter(partial(is_not_included, rectangles=rectangles), rectangles)
    rectangles = map(vec_rect_to_xywh, rectangles)
    return rectangles


def find_segments(image):
    contours, hierarchies = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    mask1 = map(is_likely_segment, contours)
    mask2 = map(has_no_parent, hierarchies[0])
    mask = starmap(__and__, zip(mask1, mask2))
    contours = compress(contours, mask)

    rectangles = (cv2.boundingRect(c) for c in contours)

    return rectangles


def parse_layout(image: np.array):

    image = image.copy()

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (7, 7), 0)
    thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    dilate = cv2.dilate(thresh, kernel, iterations=4)

    rects = list(find_segments(dilate))

    # -> Run meta detection on the previous detections TODO: refactor
    for rect in rects:
        x, y, w, h = rect
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1)
        cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 7)

    _, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY)
    image = ~image

    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    rects = find_segments(image)
    # <- End of meta detection

    rects = remove_included(rects)
    rects = remove_overlapping(rects)

    return rects


def annotate_layout_in_pdf(pdf_path, page_index=1):

    page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0]
    page = np.array(page)

    rects = parse_layout(page)
    page = draw_rectangles(page, rects)

    show_mpl(page)