cv-analysis-service/cv_analysis/test/unit_tests/table_test.py

from os.path import join
import json

from cv_analysis.table_parsing import parse_tables
from cv_analysis.locations import TEST_DATA_DIR
from cv_analysis.test.config import TEST_CONFIG
from cv_analysis.utils.test_metrics import compute_document_score
from cv_analysis.utils.preprocessing import open_pdf


def test_table_parsing():
    for i in range(1, 11):

        img_path = join(TEST_DATA_DIR, f"test{i}.png")
        json_path = join(TEST_DATA_DIR, f"test{i}.json")
        pages = open_pdf(img_path)

        result = {"pages": []}
        for i, page in enumerate(pages):
            result["pages"].append({"page": str(i), "cells": [x.json_xywh() for x in parse_tables(page)]})
        with open(json_path) as f:
            annotation = json.load(f)

        score = compute_document_score(result, annotation)

        assert round(score, 3) >= TEST_CONFIG.table_score_threshold


"""
def test_table_parsing():

    img_path = join(TEST_DATA_DIR, "table.jpg")
    json_path = join(TEST_DATA_DIR, "table.json")
    pages = open_pdf(img_path)

    result = {"pages": []}
    for i, page in enumerate(pages):
        result["pages"].append({"page": str(i), "cells": [x.xywh() for x in parse_tables(page)]})
    with open(json_path) as f:
        annotation = json.load(f)

    score = compute_document_score(result, annotation)

    assert score >= TEST_CONFIG.table_score_threshold
"""