cv-analysis-service/test/unit_tests/table_parsing_test.py

from itertools import starmap

import cv2
import pytest

from cv_analysis.table_parsing import parse_tables
from cv_analysis.utils.test_metrics import compute_document_score


@pytest.mark.xfail(reason="Azure Connection String is not set and cannot be found. Where is it hiding?")
@pytest.mark.parametrize("score_threshold", [0.95])
@pytest.mark.parametrize("test_file_index", range(1, 11))
def test_table_parsing_on_client_pages(
    score_threshold, client_page_with_table, expected_table_annotation, test_file_index
):
    result = [x.json_xywh() for x in parse_tables(client_page_with_table)]
    formatted_result = {"pages": [{"page": str(test_file_index), "cells": result}]}

    score = compute_document_score(formatted_result, expected_table_annotation)

    assert round(score, 3) >= score_threshold


@pytest.fixture
def error_tolerance(line_thickness):
    return line_thickness * 7


@pytest.mark.parametrize("line_thickness", [1, 2, 3])
@pytest.mark.parametrize("line_type", [cv2.LINE_4, cv2.LINE_AA, cv2.LINE_8])
@pytest.mark.parametrize("table_style", ["closed horizontal vertical", "open horizontal vertical"])
@pytest.mark.parametrize("n_tables", [1, 2])
@pytest.mark.parametrize("background_color", [255, 220])
@pytest.mark.parametrize("table_shape", [(5, 8)])
def test_table_parsing_on_generic_pages(page_with_table, expected_gold_page_with_table, error_tolerance):
    result = [x.xywh() for x in parse_tables(page_with_table)]
    assert (
        result == expected_gold_page_with_table
        or average_error(result, expected_gold_page_with_table) <= error_tolerance
    )


@pytest.mark.parametrize("line_thickness", [1, 2, 3])
@pytest.mark.parametrize("line_type", [cv2.LINE_4, cv2.LINE_AA, cv2.LINE_8])
@pytest.mark.parametrize("table_style", ["closed horizontal vertical", "open horizontal vertical"])
@pytest.mark.parametrize("n_tables", [1, 2])
@pytest.mark.parametrize("background_color", [255, 220])
@pytest.mark.parametrize("table_shape", [(5, 8)])
@pytest.mark.xfail
def test_bad_qual_table(page_with_patchy_table, expected_gold_page_with_table, error_tolerance):
    result = [x.xywh() for x in parse_tables(page_with_patchy_table)]
    assert (
        result == expected_gold_page_with_table
        or average_error(result, expected_gold_page_with_table) <= error_tolerance
    )


def average_error(result, expected):
    return sum(starmap(calc_rect_diff, zip(result, expected))) / len(expected)


def calc_rect_diff(rect1, rect2):
    return sum(abs(c1 - c2) for c1, c2 in zip(rect1, rect2))