cv-analysis-service/test/unit_tests/table_parsing_test.py
Julius Unverfehrt ce9e92876c Pull request #16: Add table parsing fixtures
Merge in RR/cv-analysis from add_table_parsing_fixtures to master

Squashed commit of the following:

commit cfc89b421b61082c8e92e1971c9d0bf4490fa07e
Merge: a7ecb05 73c66a8
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Mon Jul 11 12:19:01 2022 +0200

    Merge branch 'master' of ssh://git.iqser.com:2222/rr/cv-analysis into add_table_parsing_fixtures

commit a7ecb05b7d8327f0c7429180f63a380b61b06bc3
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Mon Jul 11 12:02:07 2022 +0200

    refactor

commit 466f217e5a9ee5c54fd38c6acd28d54fc38ff9bb
Author: llocarnini <lillian.locarnini@iqser.com>
Date:   Mon Jul 11 10:24:14 2022 +0200

    deleted unused imports and unused lines of code

commit c58955c8658d0631cdd1c24c8556d399e3fd9990
Author: llocarnini <lillian.locarnini@iqser.com>
Date:   Mon Jul 11 10:16:01 2022 +0200

    black reformatted files

commit f8bcb10a00ff7f0da49b80c1609b17997411985a
Author: llocarnini <lillian.locarnini@iqser.com>
Date:   Tue Jul 5 15:15:00 2022 +0200

    reformat files

commit 432e8a569fd70bd0745ce0549c2bfd2f2e907763
Author: llocarnini <lillian.locarnini@iqser.com>
Date:   Tue Jul 5 15:08:22 2022 +0200

    added better test for generic pages with table WIP as thicker lines create inconsistent results.
    added test for patchy tables which does not work yet

commit 2aac9ebf5c76bd963f8c136fe5dd4c2d7681b469
Author: llocarnini <lillian.locarnini@iqser.com>
Date:   Mon Jul 4 16:56:29 2022 +0200

    added new fixtures for table_parsing_test.py

commit 37606cac0301b13e99be2c16d95867477f29e7c4
Author: llocarnini <lillian.locarnini@iqser.com>
Date:   Fri Jul 1 16:02:44 2022 +0200

    added separate file for table parsing fixtures, where fixtures for generic tables were added. WIP tests for generic table fixtures
2022-07-11 12:25:16 +02:00

63 lines
2.4 KiB
Python

from itertools import starmap
import cv2
import pytest
from cv_analysis.table_parsing import parse_tables
from cv_analysis.utils.test_metrics import compute_document_score
@pytest.mark.parametrize("score_threshold", [0.95])
@pytest.mark.parametrize("test_file_index", range(1, 11))
def test_table_parsing_on_client_pages(
score_threshold, client_page_with_table, expected_table_annotation, test_file_index
):
result = [x.json_xywh() for x in parse_tables(client_page_with_table)]
formatted_result = {"pages": [{"page": str(test_file_index), "cells": result}]}
score = compute_document_score(formatted_result, expected_table_annotation)
assert round(score, 3) >= score_threshold
@pytest.fixture
def error_tolerance(line_thickness):
return line_thickness * 7
@pytest.mark.parametrize("line_thickness", [1, 2, 3])
@pytest.mark.parametrize("line_type", [cv2.LINE_4, cv2.LINE_AA, cv2.LINE_8])
@pytest.mark.parametrize("table_style", ["closed horizontal vertical", "open horizontal vertical"])
@pytest.mark.parametrize("n_tables", [1, 2])
@pytest.mark.parametrize("background_color", [255, 220])
@pytest.mark.parametrize("table_shape", [(5, 8)])
def test_table_parsing_on_generic_pages(page_with_table, expected_gold_page_with_table, error_tolerance):
result = [x.xywh() for x in parse_tables(page_with_table)]
assert (
result == expected_gold_page_with_table
or average_error(result, expected_gold_page_with_table) <= error_tolerance
)
@pytest.mark.parametrize("line_thickness", [1, 2, 3])
@pytest.mark.parametrize("line_type", [cv2.LINE_4, cv2.LINE_AA, cv2.LINE_8])
@pytest.mark.parametrize("table_style", ["closed horizontal vertical", "open horizontal vertical"])
@pytest.mark.parametrize("n_tables", [1, 2])
@pytest.mark.parametrize("background_color", [255, 220])
@pytest.mark.parametrize("table_shape", [(5, 8)])
@pytest.mark.xfail
def test_bad_qual_table(page_with_patchy_table, expected_gold_page_with_table, error_tolerance):
result = [x.xywh() for x in parse_tables(page_with_patchy_table)]
assert (
result == expected_gold_page_with_table
or average_error(result, expected_gold_page_with_table) <= error_tolerance
)
def average_error(result, expected):
return sum(starmap(calc_rect_diff, zip(result, expected))) / len(expected)
def calc_rect_diff(rect1, rect2):
return sum(abs(c1 - c2) for c1, c2 in zip(rect1, rect2))