Merge in RR/cv-analysis from new_pyinfra to master
Squashed commit of the following:
commit f7a01a90aad1c402ac537de5bdf15df628ad54df
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Wed Jul 27 10:40:59 2022 +0200
fix typo
commit ff4d549fac5b612c2d391ae85823c5eca1e91916
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Wed Jul 27 10:34:04 2022 +0200
adjust build scripts for new pyinfra
commit ecd70f60d46406d8b6cc7f36a1533d706c917ca8
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Wed Jul 27 09:42:55 2022 +0200
simplify logging by using default configurations
commit 20193c14c940eed2b0a7a72058167e26064119d0
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Jul 26 17:16:57 2022 +0200
tidy-up, refactor config logic to not dependent on external files
commit d8069cd4d404a570bb04a04278161669d1c83332
Author: Isaac Riley <Isaac.Riley@iqser.com>
Date: Tue Jul 26 15:14:59 2022 +0200
update pyinfra
commit c3bc11037cca9baf016043ab997c566f5b4a2586
Author: Isaac Riley <Isaac.Riley@iqser.com>
Date: Tue Jul 26 15:09:14 2022 +0200
repair tests
commit 6f4e4f2863ee16ae056c1d432f663858c5f10221
Author: Isaac Riley <Isaac.Riley@iqser.com>
Date: Tue Jul 26 14:52:38 2022 +0200
updated server logic to work with new pyinfra; update scripts for pyinfra as submodule
commit 2a18dba81de5ee84d0bdf0e77f478693e8d8aef4
Author: Isaac Riley <Isaac.Riley@iqser.com>
Date: Tue Jul 26 14:10:41 2022 +0200
formatting
commit d87ce9328de9aa2341228af9b24473d5e583504e
Author: Isaac Riley <Isaac.Riley@iqser.com>
Date: Tue Jul 26 14:10:11 2022 +0200
make server logic compatible with new pyinfra
250 lines
7.3 KiB
Python
250 lines
7.3 KiB
Python
import json
|
|
from os.path import join
|
|
|
|
import cv2
|
|
import pytest
|
|
from funcy import first
|
|
|
|
from cv_analysis.config import get_config
|
|
from cv_analysis.utils.draw import draw_rectangles
|
|
from cv_analysis.utils.open_pdf import open_pdf
|
|
from test.fixtures.figure_detection import paste_text
|
|
|
|
CV_CONFIG = get_config()
|
|
|
|
|
|
@pytest.fixture
|
|
def client_page_with_table(test_file_index):
|
|
img_path = join(CV_CONFIG.test_data_dir, f"test{test_file_index}.png")
|
|
return first(open_pdf(img_path))
|
|
|
|
|
|
@pytest.fixture
|
|
def expected_table_annotation(test_file_index):
|
|
json_path = join(CV_CONFIG.test_data_dir, f"test{test_file_index}.json")
|
|
with open(json_path) as f:
|
|
return json.load(f)
|
|
|
|
|
|
@pytest.fixture
|
|
def page_with_table(background, table_shape, table_style, n_tables, line_thickness, line_type):
|
|
page = draw_table(
|
|
background,
|
|
(100, 100),
|
|
table_shape,
|
|
table_style,
|
|
line_thickness,
|
|
line_type=line_type,
|
|
)
|
|
if n_tables == 2:
|
|
page = draw_table(page, (200, 2000), table_shape, table_style, line_thickness, line_type)
|
|
return page
|
|
|
|
|
|
@pytest.fixture
|
|
def page_with_patchy_table(page_with_table, background_color):
|
|
page = page_with_table
|
|
page_width = 2480
|
|
page_height = 3508
|
|
x_start = 0
|
|
y_start = 0
|
|
for x in range(0, page_width, 325):
|
|
page = cv2.line(
|
|
page,
|
|
(x, y_start),
|
|
(x, page_height),
|
|
tuple(3 * [background_color]),
|
|
2,
|
|
cv2.LINE_AA,
|
|
)
|
|
for y in range(0, page_height, 515):
|
|
page = cv2.line(
|
|
page,
|
|
(x_start, y),
|
|
(page_width, y),
|
|
tuple(3 * [background_color]),
|
|
1,
|
|
cv2.LINE_AA,
|
|
)
|
|
return page
|
|
|
|
|
|
@pytest.fixture
|
|
def page_with_table_and_text(page_with_table):
|
|
return paste_text(page_with_table, (50, 1500), 1, cv2.FONT_HERSHEY_COMPLEX, 1700)
|
|
|
|
|
|
@pytest.fixture
|
|
def expected_gold_page_with_table(page_with_table, n_tables):
|
|
result = [
|
|
(103, 103, 185, 198),
|
|
(291, 103, 185, 198),
|
|
(479, 103, 185, 198),
|
|
(667, 103, 185, 198),
|
|
(855, 103, 185, 198),
|
|
(1043, 103, 185, 198),
|
|
(1231, 103, 185, 198),
|
|
(1419, 103, 181, 198),
|
|
(103, 304, 185, 198),
|
|
(291, 304, 185, 198),
|
|
(479, 304, 185, 198),
|
|
(667, 304, 185, 198),
|
|
(855, 304, 185, 198),
|
|
(1043, 304, 185, 198),
|
|
(1231, 304, 185, 198),
|
|
(1419, 304, 181, 198),
|
|
(103, 505, 185, 198),
|
|
(291, 505, 185, 198),
|
|
(479, 505, 185, 198),
|
|
(667, 505, 185, 198),
|
|
(855, 505, 185, 198),
|
|
(1043, 505, 185, 198),
|
|
(1231, 505, 185, 198),
|
|
(1419, 505, 181, 198),
|
|
(103, 706, 185, 198),
|
|
(291, 706, 185, 198),
|
|
(479, 706, 185, 198),
|
|
(667, 706, 185, 198),
|
|
(855, 706, 185, 198),
|
|
(1043, 706, 185, 198),
|
|
(1231, 706, 185, 198),
|
|
(1419, 706, 181, 198),
|
|
(103, 907, 185, 193),
|
|
(291, 907, 185, 193),
|
|
(479, 907, 185, 193),
|
|
(667, 907, 185, 193),
|
|
(855, 907, 185, 193),
|
|
(1043, 907, 185, 193),
|
|
(1231, 907, 185, 193),
|
|
(1419, 907, 181, 193),
|
|
]
|
|
if n_tables == 2:
|
|
result = [
|
|
(103, 103, 185, 198),
|
|
(291, 103, 185, 198),
|
|
(479, 103, 185, 198),
|
|
(667, 103, 185, 198),
|
|
(855, 103, 185, 198),
|
|
(1043, 103, 185, 198),
|
|
(1231, 103, 185, 198),
|
|
(1419, 103, 181, 198),
|
|
(103, 304, 185, 198),
|
|
(291, 304, 185, 198),
|
|
(479, 304, 185, 198),
|
|
(667, 304, 185, 198),
|
|
(855, 304, 185, 198),
|
|
(1043, 304, 185, 198),
|
|
(1231, 304, 185, 198),
|
|
(1419, 304, 181, 198),
|
|
(103, 505, 185, 198),
|
|
(291, 505, 185, 198),
|
|
(479, 505, 185, 198),
|
|
(667, 505, 185, 198),
|
|
(855, 505, 185, 198),
|
|
(1043, 505, 185, 198),
|
|
(1231, 505, 185, 198),
|
|
(1419, 505, 181, 198),
|
|
(103, 706, 185, 198),
|
|
(291, 706, 185, 198),
|
|
(479, 706, 185, 198),
|
|
(667, 706, 185, 198),
|
|
(855, 706, 185, 198),
|
|
(1043, 706, 185, 198),
|
|
(1231, 706, 185, 198),
|
|
(1419, 706, 181, 198),
|
|
(103, 907, 185, 193),
|
|
(291, 907, 185, 193),
|
|
(479, 907, 185, 193),
|
|
(667, 907, 185, 193),
|
|
(855, 907, 185, 193),
|
|
(1043, 907, 185, 193),
|
|
(1231, 907, 185, 193),
|
|
(1419, 907, 181, 193),
|
|
(203, 2003, 186, 199),
|
|
(390, 2003, 187, 199),
|
|
(578, 2003, 187, 199),
|
|
(766, 2003, 187, 199),
|
|
(954, 2003, 187, 199),
|
|
(1142, 2003, 187, 199),
|
|
(1330, 2003, 187, 199),
|
|
(1518, 2003, 182, 199),
|
|
(203, 2203, 186, 200),
|
|
(390, 2203, 187, 200),
|
|
(578, 2203, 187, 200),
|
|
(766, 2203, 187, 200),
|
|
(954, 2203, 187, 200),
|
|
(1142, 2203, 187, 200),
|
|
(1330, 2203, 187, 200),
|
|
(1518, 2203, 182, 200),
|
|
(203, 2404, 186, 200),
|
|
(390, 2404, 187, 200),
|
|
(578, 2404, 187, 200),
|
|
(766, 2404, 187, 200),
|
|
(954, 2404, 187, 200),
|
|
(1142, 2404, 187, 200),
|
|
(1330, 2404, 187, 200),
|
|
(1518, 2404, 182, 200),
|
|
(203, 2605, 186, 200),
|
|
(390, 2605, 187, 200),
|
|
(578, 2605, 187, 200),
|
|
(766, 2605, 187, 200),
|
|
(954, 2605, 187, 200),
|
|
(1142, 2605, 187, 200),
|
|
(1330, 2605, 187, 200),
|
|
(1518, 2605, 182, 200),
|
|
(203, 2806, 186, 194),
|
|
(390, 2806, 187, 194),
|
|
(578, 2806, 187, 194),
|
|
(766, 2806, 187, 194),
|
|
(954, 2806, 187, 194),
|
|
(1142, 2806, 187, 194),
|
|
(1330, 2806, 187, 194),
|
|
(1518, 2806, 182, 194),
|
|
]
|
|
return result
|
|
|
|
|
|
def draw_table(page, table_position, table_shape, table_style, line_thickness, line_type):
|
|
bbox_table = (*table_position, 1500, 1000)
|
|
page = draw_grid_lines(
|
|
page,
|
|
table_shape,
|
|
bbox_table,
|
|
table_style,
|
|
thickness=line_thickness,
|
|
line_type=line_type,
|
|
)
|
|
if "closed" in table_style:
|
|
page = draw_rectangles(page, [bbox_table], (0, 0, 0))
|
|
return page
|
|
|
|
|
|
def draw_grid_lines(image, table_shape, bbox, visible_lines, thickness, line_type):
|
|
x, y, w, h = bbox
|
|
n_rows, n_columns = table_shape
|
|
cell_width = bbox[2] // n_columns + 1
|
|
cell_height = bbox[3] // n_rows + 1
|
|
x_line, y_line = x + cell_width, y + cell_height
|
|
if "horizontal" in visible_lines:
|
|
for y_line in range(y_line, y + h, cell_height):
|
|
image = cv2.line(
|
|
image,
|
|
(x, y_line),
|
|
(x + w, y_line),
|
|
color=(0, 0, 0),
|
|
thickness=thickness,
|
|
lineType=line_type,
|
|
)
|
|
|
|
if "vertical" in visible_lines:
|
|
for x_line in range(x_line, x + w, cell_width):
|
|
image = cv2.line(
|
|
image,
|
|
(x_line, y),
|
|
(x_line, y + h),
|
|
color=(0, 0, 0),
|
|
thickness=thickness,
|
|
lineType=line_type,
|
|
)
|
|
return image
|