import json from os.path import join import cv2 import pytest from dvc.repo import Repo from funcy import first from loguru import logger from cv_analysis.config import get_config from cv_analysis.locations import REPO_ROOT_PATH, TEST_DATA_DVC from cv_analysis.utils.draw import draw_rectangles from cv_analysis.utils.open_pdf import open_pdf from test.fixtures.figure_detection import paste_text CV_CONFIG = get_config() @pytest.fixture def client_page_with_table(test_file_index, dvc_test_data): img_path = join(CV_CONFIG.test_data_dir, f"test{test_file_index}.png") return first(open_pdf(img_path)) @pytest.fixture(scope="session") def dvc_test_data(): # noinspection PyCallingNonCallable logger.info("Pulling data with DVC...") Repo(REPO_ROOT_PATH).pull(targets=[str(TEST_DATA_DVC)]) logger.info("Finished pulling data.") @pytest.fixture def expected_table_annotation(test_file_index): json_path = join(CV_CONFIG.test_data_dir, f"test{test_file_index}.json") with open(json_path) as f: return json.load(f) @pytest.fixture def page_with_table(background, table_shape, table_style, n_tables, line_thickness, line_type): page = draw_table( background, (100, 100), table_shape, table_style, line_thickness, line_type=line_type, ) if n_tables == 2: page = draw_table(page, (200, 2000), table_shape, table_style, line_thickness, line_type) return page @pytest.fixture def page_with_patchy_table(page_with_table, background_color): page = page_with_table page_width = 2480 page_height = 3508 x_start = 0 y_start = 0 for x in range(0, page_width, 325): page = cv2.line( page, (x, y_start), (x, page_height), tuple(3 * [background_color]), 2, cv2.LINE_AA, ) for y in range(0, page_height, 515): page = cv2.line( page, (x_start, y), (page_width, y), tuple(3 * [background_color]), 1, cv2.LINE_AA, ) return page @pytest.fixture def page_with_table_and_text(page_with_table): return paste_text(page_with_table, (50, 1500), 1, cv2.FONT_HERSHEY_COMPLEX, 1700) @pytest.fixture def expected_gold_page_with_table(page_with_table, n_tables): result = [ (103, 103, 185, 198), (291, 103, 185, 198), (479, 103, 185, 198), (667, 103, 185, 198), (855, 103, 185, 198), (1043, 103, 185, 198), (1231, 103, 185, 198), (1419, 103, 181, 198), (103, 304, 185, 198), (291, 304, 185, 198), (479, 304, 185, 198), (667, 304, 185, 198), (855, 304, 185, 198), (1043, 304, 185, 198), (1231, 304, 185, 198), (1419, 304, 181, 198), (103, 505, 185, 198), (291, 505, 185, 198), (479, 505, 185, 198), (667, 505, 185, 198), (855, 505, 185, 198), (1043, 505, 185, 198), (1231, 505, 185, 198), (1419, 505, 181, 198), (103, 706, 185, 198), (291, 706, 185, 198), (479, 706, 185, 198), (667, 706, 185, 198), (855, 706, 185, 198), (1043, 706, 185, 198), (1231, 706, 185, 198), (1419, 706, 181, 198), (103, 907, 185, 193), (291, 907, 185, 193), (479, 907, 185, 193), (667, 907, 185, 193), (855, 907, 185, 193), (1043, 907, 185, 193), (1231, 907, 185, 193), (1419, 907, 181, 193), ] if n_tables == 2: result = [ (103, 103, 185, 198), (291, 103, 185, 198), (479, 103, 185, 198), (667, 103, 185, 198), (855, 103, 185, 198), (1043, 103, 185, 198), (1231, 103, 185, 198), (1419, 103, 181, 198), (103, 304, 185, 198), (291, 304, 185, 198), (479, 304, 185, 198), (667, 304, 185, 198), (855, 304, 185, 198), (1043, 304, 185, 198), (1231, 304, 185, 198), (1419, 304, 181, 198), (103, 505, 185, 198), (291, 505, 185, 198), (479, 505, 185, 198), (667, 505, 185, 198), (855, 505, 185, 198), (1043, 505, 185, 198), (1231, 505, 185, 198), (1419, 505, 181, 198), (103, 706, 185, 198), (291, 706, 185, 198), (479, 706, 185, 198), (667, 706, 185, 198), (855, 706, 185, 198), (1043, 706, 185, 198), (1231, 706, 185, 198), (1419, 706, 181, 198), (103, 907, 185, 193), (291, 907, 185, 193), (479, 907, 185, 193), (667, 907, 185, 193), (855, 907, 185, 193), (1043, 907, 185, 193), (1231, 907, 185, 193), (1419, 907, 181, 193), (203, 2003, 186, 199), (390, 2003, 187, 199), (578, 2003, 187, 199), (766, 2003, 187, 199), (954, 2003, 187, 199), (1142, 2003, 187, 199), (1330, 2003, 187, 199), (1518, 2003, 182, 199), (203, 2203, 186, 200), (390, 2203, 187, 200), (578, 2203, 187, 200), (766, 2203, 187, 200), (954, 2203, 187, 200), (1142, 2203, 187, 200), (1330, 2203, 187, 200), (1518, 2203, 182, 200), (203, 2404, 186, 200), (390, 2404, 187, 200), (578, 2404, 187, 200), (766, 2404, 187, 200), (954, 2404, 187, 200), (1142, 2404, 187, 200), (1330, 2404, 187, 200), (1518, 2404, 182, 200), (203, 2605, 186, 200), (390, 2605, 187, 200), (578, 2605, 187, 200), (766, 2605, 187, 200), (954, 2605, 187, 200), (1142, 2605, 187, 200), (1330, 2605, 187, 200), (1518, 2605, 182, 200), (203, 2806, 186, 194), (390, 2806, 187, 194), (578, 2806, 187, 194), (766, 2806, 187, 194), (954, 2806, 187, 194), (1142, 2806, 187, 194), (1330, 2806, 187, 194), (1518, 2806, 182, 194), ] return result def draw_table(page, table_position, table_shape, table_style, line_thickness, line_type): bbox_table = (*table_position, 1500, 1000) page = draw_grid_lines( page, table_shape, bbox_table, table_style, thickness=line_thickness, line_type=line_type, ) if "closed" in table_style: page = draw_rectangles(page, [bbox_table], (0, 0, 0)) return page def draw_grid_lines(image, table_shape, bbox, visible_lines, thickness, line_type): x, y, w, h = bbox n_rows, n_columns = table_shape cell_width = bbox[2] // n_columns + 1 cell_height = bbox[3] // n_rows + 1 x_line, y_line = x + cell_width, y + cell_height if "horizontal" in visible_lines: for y_line in range(y_line, y + h, cell_height): image = cv2.line( image, (x, y_line), (x + w, y_line), color=(0, 0, 0), thickness=thickness, lineType=line_type, ) if "vertical" in visible_lines: for x_line in range(x_line, x + w, cell_width): image = cv2.line( image, (x_line, y), (x_line, y + h), color=(0, 0, 0), thickness=thickness, lineType=line_type, ) return image