cv-analysis-service/test/unit_tests/server_pipeline_test.py
Julius Unverfehrt 309ae0d57b Pull request #27: Image service compat
Merge in RR/cv-analysis from image-service-compat to master

Squashed commit of the following:

commit 397d12a96a6b78de762f7b3a80a72427f5f51e97
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Tue Aug 16 16:14:40 2022 +0200

    update pdf2image, adjust response format for table-parsing & figure-detection

commit f2061bda8d25d64de974e97f36148dea29af50d9
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Mon Aug 15 08:56:39 2022 +0200

    add script to save figure detection data that can be used for image-service pipeline script
2022-08-16 17:04:05 +02:00

56 lines
1.6 KiB
Python

import fitz
import numpy as np
import pytest
from cv_analysis.server.pipeline import table_parsing_formatter, figure_detection_formatter, make_analysis_pipeline
from cv_analysis.utils.structures import Rectangle
def analysis_fn_mock(image: np.ndarray):
bbox = (0, 0, 42, 42)
return [Rectangle.from_xyxy(bbox)]
@pytest.fixture
def empty_pdf():
doc = fitz.open()
for n in range(1):
doc.new_page()
return doc.write()
@pytest.fixture
def expected_formatted_analysis_result(operation):
if operation == "table":
return [
{
"pageInfo": {"number": 0, "rotation": 0, "width": 595.0, "height": 842.0},
"tableCells": [{"x0": 0.0, "y0": 0.0, "x1": 15.12, "y1": 15.12, "width": 15.12, "height": 15.12}],
}
]
if operation == "figure":
return [
{
"pageInfo": {"number": 0, "rotation": 0, "width": 595.0, "height": 842.0},
"boundingBox": {"x0": 0.0, "y0": 0.0, "x1": 15.12, "y1": 15.12, "width": 15.12, "height": 15.12},
"alpha": False,
}
]
@pytest.fixture
def formatter(operation):
if operation == "table":
return table_parsing_formatter
elif operation == "figure":
return figure_detection_formatter
else:
raise
@pytest.mark.parametrize("operation", ["table", "figure"])
def test_analysis_pipeline(empty_pdf, formatter, expected_formatted_analysis_result):
analysis_pipeline = make_analysis_pipeline(analysis_fn_mock, formatter, dpi=200)
results = list(analysis_pipeline(empty_pdf))
assert list(results) == expected_formatted_analysis_result