import fitz import numpy as np import pytest from cv_analysis.server.pipeline import table_parsing_formatter, figure_detection_formatter, make_analysis_pipeline from cv_analysis.utils.structures import Rectangle def analysis_fn_mock(image: np.ndarray): bbox = (0, 0, 42, 42) return [Rectangle.from_xyxy(bbox)] @pytest.fixture def empty_pdf(): doc = fitz.open() for n in range(1): doc.new_page() return doc.write() @pytest.fixture def expected_formatted_analysis_result(operation): if operation == "table": return [ { "pageInfo": {"number": 1, "rotation": 0, "width": 595.0, "height": 842.0}, "tableCells": [ { "x0": 0.0, "y0": 826.8800048828125, "x1": 15.119999885559082, "y1": 842.0, "width": 15.119999885559082, "height": 15.1199951171875, } ], } ] if operation == "figure": return [ { "pageInfo": {"number": 0, "rotation": 0, "width": 595.0, "height": 842.0}, "boundingBox": {"x0": 0.0, "y0": 0.0, "x1": 15.12, "y1": 15.12, "width": 15.12, "height": 15.12}, "alpha": False, } ] @pytest.fixture def formatter(operation): if operation == "table": return table_parsing_formatter elif operation == "figure": return figure_detection_formatter else: raise @pytest.mark.parametrize("operation", ["table", "figure"]) def test_analysis_pipeline(empty_pdf, formatter, expected_formatted_analysis_result): analysis_pipeline = make_analysis_pipeline(analysis_fn_mock, formatter, dpi=200, skip_pages_without_images=False) results = list(analysis_pipeline(empty_pdf)) assert list(results) == expected_formatted_analysis_result