2022-09-20 17:25:03 +02:00

94 lines
2.8 KiB
Python

from itertools import starmap
from typing import Iterable
import cv2
import numpy as np
from cv_analysis.figure_detection.text import remove_primary_text_regions, apply_threshold_to_image
from cv_analysis.table_parsing import preprocess, isolate_vertical_and_horizontal_components, \
turn_connected_components_into_rects
from cv_analysis.utils.display import show_image_mpl
def area_is_bigger_than(rect: tuple, maxarea=100000):
x, y, w, h = rect
return w * h >= maxarea
def define_rect(rect_img, original_position):
# print(original_position)
# show_image_mpl(rect_img)
xo, yo, wo, ho = original_position
rect_img_inv = preprocess(rect_img)
# print("pixel density inverted img", pixel_density(rect_img_inv))
grid_inv = isolate_vertical_and_horizontal_components(rect_img_inv)
cnts, _ = cv2.findContours(image=grid_inv, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_SIMPLE)
if cnts:
rects = turn_connected_components_into_rects(grid_inv)
rects = map(lambda r: r.xywh(), rects)
bbox = list((cv2.boundingRect(c) for c in cnts))
if len(list(rects)) > 1 and len(bbox) == 1:
x, y, w, h = bbox[0]
w_img, h_img = rect_img.shape
if w * h / (w_img * h_img) >= 0.75:
# print("is table")
return "table"
else:
# show_image_mpl(rect_img)
# print(" table detected but to small for layout rect, so cant be table, maybe figure?")
return "other"
else:
if is_header(yo + ho):
# print("is header component")
return "header component"
elif is_footer(yo):
# print("is footer component")
return "footer component"
else:
# print("single cell or no connected components, maybe figure?")
return "other"
else:
if is_header(yo + ho):
# print("is header text")
return "header text"
elif is_footer(yo):
# print("is footer text")
return "footer text"
else:
# print("is text")
return "text"
def is_header(y):
return y < 200
def is_footer(y):
return y > 2100
def is_text(img):
show_image_mpl(img)
cleaned = remove_primary_text_regions(img)
show_image_mpl(cleaned)
return pixel_density(cleaned) < 0.05
def pixel_density(img):
pixels = np.count_nonzero(img)
density = pixels / img.size
return density
def label_rects(image: np.array, rects: Iterable[tuple]):
def crop_image_rects(rect):
x, y, w, h = rect
return image[y:y + h, x:x + w]
rect_images = map(crop_image_rects, rects)
rect_labels = starmap(define_rect, zip(rect_images, rects))
return rect_labels