partial clean up
This commit is contained in:
parent
5a7b756fc1
commit
ba33417166
@ -84,6 +84,6 @@ def parse_layout(image: np.array):
|
||||
|
||||
rects = map(lambda r: r.xywh(), rects)
|
||||
rects = connect_related_rects(rects)
|
||||
# rects = list(map(Rectangle.from_xywh, rects))
|
||||
rects = list(map(Rectangle.from_xywh, rects))
|
||||
# rects = remove_included(rects)
|
||||
return rects
|
||||
|
||||
@ -6,7 +6,6 @@ import cv2
|
||||
import numpy as np
|
||||
from funcy import lmap
|
||||
|
||||
from cv_analysis.layout_parsing import parse_layout
|
||||
from cv_analysis.utils.postprocessing import remove_isolated # xywh_to_vecs, xywh_to_vec_rect, adjacent1d
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
from cv_analysis.utils.visual_logging import vizlogger
|
||||
@ -86,32 +85,36 @@ def isolate_vertical_and_horizontal_components(img_bin):
|
||||
return img_bin_final
|
||||
|
||||
|
||||
def find_table_layout_boxes(image: np.array):
|
||||
def is_large_enough(box):
|
||||
(x, y, w, h) = box
|
||||
if w * h >= 100000:
|
||||
return Rectangle.from_xywh(box)
|
||||
|
||||
layout_boxes = parse_layout(image)
|
||||
a = lmap(is_large_enough, layout_boxes)
|
||||
return lmap(is_large_enough, layout_boxes)
|
||||
|
||||
|
||||
def preprocess(image: np.array):
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image
|
||||
_, image = cv2.threshold(image, 195, 255, cv2.THRESH_BINARY)
|
||||
return ~image
|
||||
|
||||
|
||||
# def turn_connected_components_into_rects(image: np.array):
|
||||
# def is_large_enough(stat):
|
||||
# x1, y1, w, h, area = stat
|
||||
# return area > 2000 and w > 35 and h > 25
|
||||
#
|
||||
# _, _, stats, _ = cv2.connectedComponentsWithStats(~image, connectivity=8, ltype=cv2.CV_32S)
|
||||
#
|
||||
# stats = np.vstack(list(filter(is_large_enough, stats)))
|
||||
# rects = list(map(Rectangle.from_xywh, stats[:, :-1][2:]))
|
||||
# return remove_isolated(rects)
|
||||
|
||||
|
||||
def turn_connected_components_into_rects(image: np.array):
|
||||
def is_large_enough(stat):
|
||||
x1, y1, w, h, area = stat
|
||||
return area > 2000 and w > 35 and h > 25
|
||||
|
||||
_, _, stats, _ = cv2.connectedComponentsWithStats(~image, connectivity=8, ltype=cv2.CV_32S)
|
||||
|
||||
stats = np.vstack(list(filter(is_large_enough, stats)))
|
||||
return stats[:, :-1][2:]
|
||||
try:
|
||||
stats = np.vstack(list(filter(is_large_enough, stats)))
|
||||
rects = list(map(Rectangle.from_xywh, stats[:, :-1][2:]))
|
||||
return remove_isolated(rects)
|
||||
except ValueError:
|
||||
return []
|
||||
|
||||
|
||||
def parse_tables(image: np.array, show=False):
|
||||
@ -128,9 +131,9 @@ def parse_tables(image: np.array, show=False):
|
||||
image = isolate_vertical_and_horizontal_components(image)
|
||||
rects = turn_connected_components_into_rects(image)
|
||||
#print(rects, "\n\n")
|
||||
rects = list(map(Rectangle.from_xywh, rects))
|
||||
#rects = list(map(Rectangle.from_xywh, rects))
|
||||
#print(rects, "\n\n")
|
||||
rects = remove_isolated(rects)
|
||||
#rects = remove_isolated(rects)
|
||||
#print(rects, "\n\n")
|
||||
|
||||
return rects
|
||||
|
||||
@ -65,16 +65,6 @@ def rects_not_the_same(r):
|
||||
return r[0] != r[1]
|
||||
|
||||
|
||||
def find_related_rects(rects):
|
||||
rect_pairs = list(filter(is_related, combinations(rects, 2)))
|
||||
rect_pairs = list(filter(rects_not_the_same, rect_pairs))
|
||||
if not rect_pairs:
|
||||
return [], rects
|
||||
rel_rects = list(set([rect for pair in rect_pairs for rect in pair]))
|
||||
unrel_rects = [rect for rect in rects if rect not in rel_rects]
|
||||
return rect_pairs, unrel_rects
|
||||
|
||||
|
||||
|
||||
def connect_related_rects(rects: Iterable[tuple]):
|
||||
rects = list(rects)
|
||||
|
||||
@ -32,37 +32,33 @@ def define_rect(rect_img, original_position):
|
||||
w_img, h_img = rect_img.shape
|
||||
if w * h / (w_img * h_img) >= 0.75:
|
||||
# print("is table")
|
||||
return (255, 255, 0)
|
||||
return "table"
|
||||
else:
|
||||
# show_image_mpl(rect_img)
|
||||
# print(" table detected but to small for layout rect, so cant be table, maybe figure?")
|
||||
return (0, 255, 255)
|
||||
return "other"
|
||||
|
||||
else:
|
||||
if is_header(yo + ho):
|
||||
# print("is header component")
|
||||
return (255, 0, 0)
|
||||
return "header component"
|
||||
elif is_footer(yo):
|
||||
# print("is footer component")
|
||||
return (0, 255, 0)
|
||||
return "footer component"
|
||||
else:
|
||||
# print("img-inv",pixel_density(rect_img_inv))
|
||||
# show_image_mpl(rect_img)
|
||||
# print("grid_in", pixel_density(grid_inv))
|
||||
# show_image_mpl(grid_inv)
|
||||
# print("single cell or no connected components, maybe figure?")
|
||||
return (0, 255, 255)
|
||||
return "other"
|
||||
|
||||
else:
|
||||
if is_header(yo + ho):
|
||||
# print("is header text")
|
||||
return (255, 0, 0)
|
||||
return "header text"
|
||||
elif is_footer(yo):
|
||||
# print("is footer text")
|
||||
return (0, 255, 0)
|
||||
return "footer text"
|
||||
else:
|
||||
# print("is text")
|
||||
return (0, 0, 255)
|
||||
return "text"
|
||||
|
||||
|
||||
def is_header(y):
|
||||
@ -86,24 +82,6 @@ def pixel_density(img):
|
||||
return density
|
||||
|
||||
|
||||
def annotate_rect(image, rects, rect_labels):
|
||||
def annotate_rect(x, y, w, h):
|
||||
cv2.putText(
|
||||
image,
|
||||
"+",
|
||||
(x + (w // 2) - 12, y + (h // 2) + 9),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
for rect, label in zip(rects, rect_labels):
|
||||
x, y, w, h = rect
|
||||
cv2.rectangle(image, (x, y), (x + w, y + h), label, 2)
|
||||
|
||||
return image
|
||||
|
||||
|
||||
def label_rects(image: np.array, rects: Iterable[tuple]):
|
||||
def crop_image_rects(rect):
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
from itertools import starmap
|
||||
from typing import Iterable
|
||||
|
||||
import cv2
|
||||
@ -16,11 +17,11 @@ def area_is_bigger_than(rect: tuple, maxarea=100000):
|
||||
|
||||
def define_rect(rect_img, original_position):
|
||||
show_image_mpl(rect_img)
|
||||
xo,yo,wo,ho = original_position
|
||||
if is_header(yo+ho):
|
||||
x,y,w,h = original_position
|
||||
if is_header(y+h):
|
||||
print(original_position, " is header")
|
||||
return "header"
|
||||
elif is_footer(yo):
|
||||
elif is_footer(y):
|
||||
print(original_position, " is footer")
|
||||
return "footer"
|
||||
elif is_table(rect_img):
|
||||
@ -29,6 +30,8 @@ def define_rect(rect_img, original_position):
|
||||
elif is_text(rect_img):
|
||||
print(original_position, " is text")
|
||||
return "text"
|
||||
else:
|
||||
return "other"
|
||||
|
||||
|
||||
def is_table(rect_img):
|
||||
@ -43,7 +46,7 @@ def is_table(rect_img):
|
||||
x, y, w, h = bbox[0]
|
||||
w_img, h_img = rect_img.shape
|
||||
if w * h / (w_img * h_img) >= 0.75:
|
||||
print("is table")
|
||||
#print("is table")
|
||||
return True
|
||||
else:
|
||||
print(" table detected but to small for layout rect, so cant be table, maybe figure?")
|
||||
@ -83,7 +86,11 @@ def annotate_rect(rect, rect_img):
|
||||
|
||||
|
||||
def label_rects(rects: Iterable[tuple], image: np.array):
|
||||
labeled_rects = {}
|
||||
for rect in rects:
|
||||
def crop_image_rects(rect):
|
||||
x, y, w, h = rect
|
||||
labeled_rects[rect] = define_rect(image[y:y + h, x:x + w], rect)
|
||||
return image[y:y + h, x:x + w]
|
||||
|
||||
rect_images = map(crop_image_rects, rects)
|
||||
rect_labels = starmap(define_rect, zip(rect_images, rects))
|
||||
print(rect_labels)
|
||||
return rect_labels
|
||||
@ -13,6 +13,8 @@ from cv_analysis.table_parsing import parse_tables
|
||||
from cv_analysis.utils.draw import draw_rectangles
|
||||
from pdf2img.conversion import convert_pages_to_images
|
||||
|
||||
from cv_analysis.utils.sort_rects import label_rects
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user