cv-analysis-service/test/unit_tests/server_pipeline_test.py

86 lines
2.3 KiB
Python

import fitz
import numpy as np
import pytest
from cv_analysis.server.pipeline import (
figure_detection_formatter,
make_analysis_pipeline,
table_parsing_formatter,
)
from cv_analysis.utils.structures import Rectangle
def analysis_fn_mock(image: np.ndarray):
bbox = (0, 0, 42, 42)
return [Rectangle.from_xyxy(bbox)]
@pytest.fixture
def empty_pdf():
doc = fitz.open()
for n in range(1):
doc.new_page()
return doc.write()
@pytest.fixture
def expected_formatted_analysis_result(operation):
if operation == "table_cells":
return [
{
"pageInfo": {
"number": 1,
"rotation": 0,
"width": 595.0,
"height": 842.0,
},
"tableCells": [
{
"x0": 0.0,
"y0": 826.8800048828125,
"x1": 15.119999885559082,
"y1": 842.0,
"width": 15.119999885559082,
"height": 15.1199951171875,
}
],
}
]
if operation == "figure":
return [
{
"pageInfo": {
"number": 0,
"rotation": 0,
"width": 595.0,
"height": 842.0,
},
"boundingBox": {
"x0": 0.0,
"y0": 0.0,
"x1": 15.12,
"y1": 15.12,
"width": 15.12,
"height": 15.12,
},
"alpha": False,
}
]
@pytest.fixture
def formatter(operation):
if operation == "table_cells":
return table_parsing_formatter
elif operation == "figure":
return figure_detection_formatter
else:
raise
@pytest.mark.parametrize("operation", ["figure"])
def test_analysis_pipeline(empty_pdf, formatter, expected_formatted_analysis_result):
analysis_pipeline = make_analysis_pipeline(analysis_fn_mock, formatter, dpi=200, skip_pages_without_images=False)
results = list(analysis_pipeline(empty_pdf))
assert list(results) == expected_formatted_analysis_result