261 lines
7.7 KiB
Python
261 lines
7.7 KiB
Python
import json
|
|
from os.path import join
|
|
|
|
import cv2
|
|
import pytest
|
|
from dvc.repo import Repo
|
|
from funcy import first
|
|
from loguru import logger
|
|
|
|
from cv_analysis.config import get_config
|
|
from cv_analysis.locations import REPO_ROOT_PATH, TEST_DATA_DVC
|
|
from cv_analysis.utils.drawing import draw_rectangles
|
|
from cv_analysis.utils.input import open_analysis_input_file
|
|
from test.fixtures.figure_detection import paste_text
|
|
|
|
CV_CONFIG = get_config()
|
|
|
|
|
|
@pytest.fixture
|
|
def client_page_with_table(test_file_index, dvc_test_data):
|
|
img_path = join(CV_CONFIG.test_data_dir, f"test{test_file_index}.png")
|
|
return first(open_analysis_input_file(img_path))
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def dvc_test_data():
|
|
# noinspection PyCallingNonCallable
|
|
logger.info("Pulling data with DVC...")
|
|
Repo(REPO_ROOT_PATH).pull(targets=[str(TEST_DATA_DVC)])
|
|
logger.info("Finished pulling data.")
|
|
|
|
|
|
@pytest.fixture
|
|
def expected_table_annotation(test_file_index):
|
|
json_path = join(CV_CONFIG.test_data_dir, f"test{test_file_index}.json")
|
|
with open(json_path) as f:
|
|
return json.load(f)
|
|
|
|
|
|
@pytest.fixture
|
|
def page_with_table(background, table_shape, table_style, n_tables, line_thickness, line_type):
|
|
page = draw_table(
|
|
background,
|
|
(100, 100),
|
|
table_shape,
|
|
table_style,
|
|
line_thickness,
|
|
line_type=line_type,
|
|
)
|
|
if n_tables == 2:
|
|
page = draw_table(page, (200, 2000), table_shape, table_style, line_thickness, line_type)
|
|
return page
|
|
|
|
|
|
@pytest.fixture
|
|
def page_with_patchy_table(page_with_table, background_color):
|
|
page = page_with_table
|
|
page_width = 2480
|
|
page_height = 3508
|
|
x_start = 0
|
|
y_start = 0
|
|
for x in range(0, page_width, 325):
|
|
page = cv2.line(
|
|
page,
|
|
(x, y_start),
|
|
(x, page_height),
|
|
tuple(3 * [background_color]),
|
|
2,
|
|
cv2.LINE_AA,
|
|
)
|
|
for y in range(0, page_height, 515):
|
|
page = cv2.line(
|
|
page,
|
|
(x_start, y),
|
|
(page_width, y),
|
|
tuple(3 * [background_color]),
|
|
1,
|
|
cv2.LINE_AA,
|
|
)
|
|
return page
|
|
|
|
|
|
@pytest.fixture
|
|
def page_with_table_and_text(page_with_table):
|
|
return paste_text(page_with_table, (50, 1500), 1, cv2.FONT_HERSHEY_COMPLEX, 1700)
|
|
|
|
|
|
@pytest.fixture
|
|
def expected_gold_page_with_table(page_with_table, n_tables):
|
|
result = [
|
|
(103, 103, 185, 198),
|
|
(291, 103, 185, 198),
|
|
(479, 103, 185, 198),
|
|
(667, 103, 185, 198),
|
|
(855, 103, 185, 198),
|
|
(1043, 103, 185, 198),
|
|
(1231, 103, 185, 198),
|
|
(1419, 103, 181, 198),
|
|
(103, 304, 185, 198),
|
|
(291, 304, 185, 198),
|
|
(479, 304, 185, 198),
|
|
(667, 304, 185, 198),
|
|
(855, 304, 185, 198),
|
|
(1043, 304, 185, 198),
|
|
(1231, 304, 185, 198),
|
|
(1419, 304, 181, 198),
|
|
(103, 505, 185, 198),
|
|
(291, 505, 185, 198),
|
|
(479, 505, 185, 198),
|
|
(667, 505, 185, 198),
|
|
(855, 505, 185, 198),
|
|
(1043, 505, 185, 198),
|
|
(1231, 505, 185, 198),
|
|
(1419, 505, 181, 198),
|
|
(103, 706, 185, 198),
|
|
(291, 706, 185, 198),
|
|
(479, 706, 185, 198),
|
|
(667, 706, 185, 198),
|
|
(855, 706, 185, 198),
|
|
(1043, 706, 185, 198),
|
|
(1231, 706, 185, 198),
|
|
(1419, 706, 181, 198),
|
|
(103, 907, 185, 193),
|
|
(291, 907, 185, 193),
|
|
(479, 907, 185, 193),
|
|
(667, 907, 185, 193),
|
|
(855, 907, 185, 193),
|
|
(1043, 907, 185, 193),
|
|
(1231, 907, 185, 193),
|
|
(1419, 907, 181, 193),
|
|
]
|
|
if n_tables == 2:
|
|
result = [
|
|
(103, 103, 185, 198),
|
|
(291, 103, 185, 198),
|
|
(479, 103, 185, 198),
|
|
(667, 103, 185, 198),
|
|
(855, 103, 185, 198),
|
|
(1043, 103, 185, 198),
|
|
(1231, 103, 185, 198),
|
|
(1419, 103, 181, 198),
|
|
(103, 304, 185, 198),
|
|
(291, 304, 185, 198),
|
|
(479, 304, 185, 198),
|
|
(667, 304, 185, 198),
|
|
(855, 304, 185, 198),
|
|
(1043, 304, 185, 198),
|
|
(1231, 304, 185, 198),
|
|
(1419, 304, 181, 198),
|
|
(103, 505, 185, 198),
|
|
(291, 505, 185, 198),
|
|
(479, 505, 185, 198),
|
|
(667, 505, 185, 198),
|
|
(855, 505, 185, 198),
|
|
(1043, 505, 185, 198),
|
|
(1231, 505, 185, 198),
|
|
(1419, 505, 181, 198),
|
|
(103, 706, 185, 198),
|
|
(291, 706, 185, 198),
|
|
(479, 706, 185, 198),
|
|
(667, 706, 185, 198),
|
|
(855, 706, 185, 198),
|
|
(1043, 706, 185, 198),
|
|
(1231, 706, 185, 198),
|
|
(1419, 706, 181, 198),
|
|
(103, 907, 185, 193),
|
|
(291, 907, 185, 193),
|
|
(479, 907, 185, 193),
|
|
(667, 907, 185, 193),
|
|
(855, 907, 185, 193),
|
|
(1043, 907, 185, 193),
|
|
(1231, 907, 185, 193),
|
|
(1419, 907, 181, 193),
|
|
(203, 2003, 186, 199),
|
|
(390, 2003, 187, 199),
|
|
(578, 2003, 187, 199),
|
|
(766, 2003, 187, 199),
|
|
(954, 2003, 187, 199),
|
|
(1142, 2003, 187, 199),
|
|
(1330, 2003, 187, 199),
|
|
(1518, 2003, 182, 199),
|
|
(203, 2203, 186, 200),
|
|
(390, 2203, 187, 200),
|
|
(578, 2203, 187, 200),
|
|
(766, 2203, 187, 200),
|
|
(954, 2203, 187, 200),
|
|
(1142, 2203, 187, 200),
|
|
(1330, 2203, 187, 200),
|
|
(1518, 2203, 182, 200),
|
|
(203, 2404, 186, 200),
|
|
(390, 2404, 187, 200),
|
|
(578, 2404, 187, 200),
|
|
(766, 2404, 187, 200),
|
|
(954, 2404, 187, 200),
|
|
(1142, 2404, 187, 200),
|
|
(1330, 2404, 187, 200),
|
|
(1518, 2404, 182, 200),
|
|
(203, 2605, 186, 200),
|
|
(390, 2605, 187, 200),
|
|
(578, 2605, 187, 200),
|
|
(766, 2605, 187, 200),
|
|
(954, 2605, 187, 200),
|
|
(1142, 2605, 187, 200),
|
|
(1330, 2605, 187, 200),
|
|
(1518, 2605, 182, 200),
|
|
(203, 2806, 186, 194),
|
|
(390, 2806, 187, 194),
|
|
(578, 2806, 187, 194),
|
|
(766, 2806, 187, 194),
|
|
(954, 2806, 187, 194),
|
|
(1142, 2806, 187, 194),
|
|
(1330, 2806, 187, 194),
|
|
(1518, 2806, 182, 194),
|
|
]
|
|
return result
|
|
|
|
|
|
def draw_table(page, table_position, table_shape, table_style, line_thickness, line_type):
|
|
bbox_table = (*table_position, 1500, 1000)
|
|
page = draw_grid_lines(
|
|
page,
|
|
table_shape,
|
|
bbox_table,
|
|
table_style,
|
|
thickness=line_thickness,
|
|
line_type=line_type,
|
|
)
|
|
if "closed" in table_style:
|
|
page = draw_rectangles(page, [bbox_table], (0, 0, 0))
|
|
return page
|
|
|
|
|
|
def draw_grid_lines(image, table_shape, bbox, visible_lines, thickness, line_type):
|
|
x, y, w, h = bbox
|
|
n_rows, n_columns = table_shape
|
|
cell_width = bbox[2] // n_columns + 1
|
|
cell_height = bbox[3] // n_rows + 1
|
|
x_line, y_line = x + cell_width, y + cell_height
|
|
if "horizontal" in visible_lines:
|
|
for y_line in range(y_line, y + h, cell_height):
|
|
image = cv2.line(
|
|
image,
|
|
(x, y_line),
|
|
(x + w, y_line),
|
|
color=(0, 0, 0),
|
|
thickness=thickness,
|
|
lineType=line_type,
|
|
)
|
|
|
|
if "vertical" in visible_lines:
|
|
for x_line in range(x_line, x + w, cell_width):
|
|
image = cv2.line(
|
|
image,
|
|
(x_line, y),
|
|
(x_line, y + h),
|
|
color=(0, 0, 0),
|
|
thickness=thickness,
|
|
lineType=line_type,
|
|
)
|
|
return image
|