diff --git a/.gitignore b/.gitignore index bfbacf3..393863e 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,9 @@ /.idea/vcs.xml /results/ /data -/table_parsing.egg-info \ No newline at end of file +/table_parsing.egg-info +/tests/VV-313450.pdf +/vidocp.egg-info/dependency_links.txt +/vidocp.egg-info/PKG-INFO +/vidocp.egg-info/SOURCES.txt +/vidocp.egg-info/top_level.txt diff --git a/tests/test_table_parsing.py b/tests/test_table_parsing.py new file mode 100644 index 0000000..c882d2d --- /dev/null +++ b/tests/test_table_parsing.py @@ -0,0 +1,29 @@ +import pytest +from vidocp.table_parsing import parse_table +import numpy as np +import pdf2image + + +@pytest.fixture() +def rects(): + page_index = 0 + pdf_path = "/home/lillian/vidocp/tests/VV-313450.pdf" + page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0] + page = np.array(page) + rectangles = parse_table(page) + return rectangles + + +def test_num_of_rects(rects): + assert len(rects) == 49 + + +def test_range_of_rects(rects): + expected_range = ((210, 605), (1430, 1620)) + topleft = min(rects) + x,y,w,h = max(rects) + bottomright = (x+w, y+h) + + assert topleft >= expected_range[0] + assert bottomright <= expected_range[1] +