This commit also disables a broken test that connot be fixed. There are also many scripts that didn't work anyways (and are not needed in my eyes) that were not updatet. The scripts that are needed to run the service processing locally still work.
261 lines
7.7 KiB
Python
261 lines
7.7 KiB
Python
import json
|
|
from os.path import join
|
|
|
|
import cv2
|
|
import pytest
|
|
from dvc.repo import Repo
|
|
from funcy import first
|
|
from kn_utils.logging import logger
|
|
|
|
from cv_analysis.config import get_config
|
|
from cv_analysis.locations import REPO_ROOT_PATH, TEST_DATA_DVC
|
|
from cv_analysis.utils.draw import draw_rectangles
|
|
from cv_analysis.utils.open_pdf import open_pdf
|
|
from test.fixtures.figure_detection import paste_text
|
|
|
|
settings = get_config()
|
|
|
|
|
|
@pytest.fixture
|
|
def client_page_with_table(test_file_index, dvc_test_data):
|
|
img_path = join(settings.paths.test_data_dir, f"test{test_file_index}.png")
|
|
return first(open_pdf(img_path))
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def dvc_test_data():
|
|
# noinspection PyCallingNonCallable
|
|
logger.info("Pulling data with DVC...")
|
|
Repo(REPO_ROOT_PATH).pull(targets=[str(TEST_DATA_DVC)])
|
|
logger.info("Finished pulling data.")
|
|
|
|
|
|
@pytest.fixture
|
|
def expected_table_annotation(test_file_index):
|
|
json_path = join(settings.paths.test_data_dir, f"test{test_file_index}.json")
|
|
with open(json_path) as f:
|
|
return json.load(f)
|
|
|
|
|
|
@pytest.fixture
|
|
def page_with_table(background, table_shape, table_style, n_tables, line_thickness, line_type):
|
|
page = draw_table(
|
|
background,
|
|
(100, 100),
|
|
table_shape,
|
|
table_style,
|
|
line_thickness,
|
|
line_type=line_type,
|
|
)
|
|
if n_tables == 2:
|
|
page = draw_table(page, (200, 2000), table_shape, table_style, line_thickness, line_type)
|
|
return page
|
|
|
|
|
|
@pytest.fixture
|
|
def page_with_patchy_table(page_with_table, background_color):
|
|
page = page_with_table
|
|
page_width = 2480
|
|
page_height = 3508
|
|
x_start = 0
|
|
y_start = 0
|
|
for x in range(0, page_width, 325):
|
|
page = cv2.line(
|
|
page,
|
|
(x, y_start),
|
|
(x, page_height),
|
|
tuple(3 * [background_color]),
|
|
2,
|
|
cv2.LINE_AA,
|
|
)
|
|
for y in range(0, page_height, 515):
|
|
page = cv2.line(
|
|
page,
|
|
(x_start, y),
|
|
(page_width, y),
|
|
tuple(3 * [background_color]),
|
|
1,
|
|
cv2.LINE_AA,
|
|
)
|
|
return page
|
|
|
|
|
|
@pytest.fixture
|
|
def page_with_table_and_text(page_with_table):
|
|
return paste_text(page_with_table, (50, 1500), 1, cv2.FONT_HERSHEY_COMPLEX, 1700)
|
|
|
|
|
|
@pytest.fixture
|
|
def expected_gold_page_with_table(page_with_table, n_tables):
|
|
result = [
|
|
(103, 103, 185, 198),
|
|
(291, 103, 185, 198),
|
|
(479, 103, 185, 198),
|
|
(667, 103, 185, 198),
|
|
(855, 103, 185, 198),
|
|
(1043, 103, 185, 198),
|
|
(1231, 103, 185, 198),
|
|
(1419, 103, 181, 198),
|
|
(103, 304, 185, 198),
|
|
(291, 304, 185, 198),
|
|
(479, 304, 185, 198),
|
|
(667, 304, 185, 198),
|
|
(855, 304, 185, 198),
|
|
(1043, 304, 185, 198),
|
|
(1231, 304, 185, 198),
|
|
(1419, 304, 181, 198),
|
|
(103, 505, 185, 198),
|
|
(291, 505, 185, 198),
|
|
(479, 505, 185, 198),
|
|
(667, 505, 185, 198),
|
|
(855, 505, 185, 198),
|
|
(1043, 505, 185, 198),
|
|
(1231, 505, 185, 198),
|
|
(1419, 505, 181, 198),
|
|
(103, 706, 185, 198),
|
|
(291, 706, 185, 198),
|
|
(479, 706, 185, 198),
|
|
(667, 706, 185, 198),
|
|
(855, 706, 185, 198),
|
|
(1043, 706, 185, 198),
|
|
(1231, 706, 185, 198),
|
|
(1419, 706, 181, 198),
|
|
(103, 907, 185, 193),
|
|
(291, 907, 185, 193),
|
|
(479, 907, 185, 193),
|
|
(667, 907, 185, 193),
|
|
(855, 907, 185, 193),
|
|
(1043, 907, 185, 193),
|
|
(1231, 907, 185, 193),
|
|
(1419, 907, 181, 193),
|
|
]
|
|
if n_tables == 2:
|
|
result = [
|
|
(103, 103, 185, 198),
|
|
(291, 103, 185, 198),
|
|
(479, 103, 185, 198),
|
|
(667, 103, 185, 198),
|
|
(855, 103, 185, 198),
|
|
(1043, 103, 185, 198),
|
|
(1231, 103, 185, 198),
|
|
(1419, 103, 181, 198),
|
|
(103, 304, 185, 198),
|
|
(291, 304, 185, 198),
|
|
(479, 304, 185, 198),
|
|
(667, 304, 185, 198),
|
|
(855, 304, 185, 198),
|
|
(1043, 304, 185, 198),
|
|
(1231, 304, 185, 198),
|
|
(1419, 304, 181, 198),
|
|
(103, 505, 185, 198),
|
|
(291, 505, 185, 198),
|
|
(479, 505, 185, 198),
|
|
(667, 505, 185, 198),
|
|
(855, 505, 185, 198),
|
|
(1043, 505, 185, 198),
|
|
(1231, 505, 185, 198),
|
|
(1419, 505, 181, 198),
|
|
(103, 706, 185, 198),
|
|
(291, 706, 185, 198),
|
|
(479, 706, 185, 198),
|
|
(667, 706, 185, 198),
|
|
(855, 706, 185, 198),
|
|
(1043, 706, 185, 198),
|
|
(1231, 706, 185, 198),
|
|
(1419, 706, 181, 198),
|
|
(103, 907, 185, 193),
|
|
(291, 907, 185, 193),
|
|
(479, 907, 185, 193),
|
|
(667, 907, 185, 193),
|
|
(855, 907, 185, 193),
|
|
(1043, 907, 185, 193),
|
|
(1231, 907, 185, 193),
|
|
(1419, 907, 181, 193),
|
|
(203, 2003, 186, 199),
|
|
(390, 2003, 187, 199),
|
|
(578, 2003, 187, 199),
|
|
(766, 2003, 187, 199),
|
|
(954, 2003, 187, 199),
|
|
(1142, 2003, 187, 199),
|
|
(1330, 2003, 187, 199),
|
|
(1518, 2003, 182, 199),
|
|
(203, 2203, 186, 200),
|
|
(390, 2203, 187, 200),
|
|
(578, 2203, 187, 200),
|
|
(766, 2203, 187, 200),
|
|
(954, 2203, 187, 200),
|
|
(1142, 2203, 187, 200),
|
|
(1330, 2203, 187, 200),
|
|
(1518, 2203, 182, 200),
|
|
(203, 2404, 186, 200),
|
|
(390, 2404, 187, 200),
|
|
(578, 2404, 187, 200),
|
|
(766, 2404, 187, 200),
|
|
(954, 2404, 187, 200),
|
|
(1142, 2404, 187, 200),
|
|
(1330, 2404, 187, 200),
|
|
(1518, 2404, 182, 200),
|
|
(203, 2605, 186, 200),
|
|
(390, 2605, 187, 200),
|
|
(578, 2605, 187, 200),
|
|
(766, 2605, 187, 200),
|
|
(954, 2605, 187, 200),
|
|
(1142, 2605, 187, 200),
|
|
(1330, 2605, 187, 200),
|
|
(1518, 2605, 182, 200),
|
|
(203, 2806, 186, 194),
|
|
(390, 2806, 187, 194),
|
|
(578, 2806, 187, 194),
|
|
(766, 2806, 187, 194),
|
|
(954, 2806, 187, 194),
|
|
(1142, 2806, 187, 194),
|
|
(1330, 2806, 187, 194),
|
|
(1518, 2806, 182, 194),
|
|
]
|
|
return result
|
|
|
|
|
|
def draw_table(page, table_position, table_shape, table_style, line_thickness, line_type):
|
|
bbox_table = (*table_position, 1500, 1000)
|
|
page = draw_grid_lines(
|
|
page,
|
|
table_shape,
|
|
bbox_table,
|
|
table_style,
|
|
thickness=line_thickness,
|
|
line_type=line_type,
|
|
)
|
|
if "closed" in table_style:
|
|
page = draw_rectangles(page, [bbox_table], (0, 0, 0))
|
|
return page
|
|
|
|
|
|
def draw_grid_lines(image, table_shape, bbox, visible_lines, thickness, line_type):
|
|
x, y, w, h = bbox
|
|
n_rows, n_columns = table_shape
|
|
cell_width = bbox[2] // n_columns + 1
|
|
cell_height = bbox[3] // n_rows + 1
|
|
x_line, y_line = x + cell_width, y + cell_height
|
|
if "horizontal" in visible_lines:
|
|
for y_line in range(y_line, y + h, cell_height):
|
|
image = cv2.line(
|
|
image,
|
|
(x, y_line),
|
|
(x + w, y_line),
|
|
color=(0, 0, 0),
|
|
thickness=thickness,
|
|
lineType=line_type,
|
|
)
|
|
|
|
if "vertical" in visible_lines:
|
|
for x_line in range(x_line, x + w, cell_width):
|
|
image = cv2.line(
|
|
image,
|
|
(x_line, y),
|
|
(x_line, y + h),
|
|
color=(0, 0, 0),
|
|
thickness=thickness,
|
|
lineType=line_type,
|
|
)
|
|
return image
|