pyinfra/tests/unit_test/data_loader_test.py
2024-07-16 16:32:58 +02:00

64 lines
1.9 KiB
Python

import gzip
from pathlib import Path
import pytest
from pyinfra.storage.proto_data_loader import ProtoDataLoader
@pytest.fixture
def test_data_dir():
return Path(__file__).parents[1] / "data"
@pytest.fixture
def document_structure_document(test_data_dir) -> (str, bytes):
file_path = test_data_dir / "72ea04dfdbeb277f37b9eb127efb0896.DOCUMENT_STRUCTURE.proto.gz"
data = file_path.read_bytes()
return file_path, data
@pytest.fixture
def document_text_document(test_data_dir) -> (str, bytes):
file_path = test_data_dir / "72ea04dfdbeb277f37b9eb127efb0896.DOCUMENT_TEXT.proto.gz"
data = file_path.read_bytes()
return file_path, data
@pytest.fixture
def document_pages_document(test_data_dir) -> (str, bytes):
file_path = test_data_dir / "72ea04dfdbeb277f37b9eb127efb0896.DOCUMENT_PAGES.proto.gz"
data = file_path.read_bytes()
return file_path, data
@pytest.fixture
def document_position_document(test_data_dir) -> (str, bytes):
file_path = test_data_dir / "72ea04dfdbeb277f37b9eb127efb0896.DOCUMENT_POSITION.proto.gz"
data = file_path.read_bytes()
return file_path, data
@pytest.fixture
def proto_data_loader():
return ProtoDataLoader()
@pytest.mark.parametrize(
"document_fixture",
[
"document_structure_document",
"document_text_document",
"document_pages_document",
"document_position_document",
],
)
def test_proto_data_loader(document_fixture, request, proto_data_loader):
file_path, data = request.getfixturevalue(document_fixture)
data = gzip.decompress(data)
loaded_data = proto_data_loader(file_path, data)
# TODO: Right now, we don't have access to proto-json pairs to compare the loaded data with the expected data.
# If this becomes available, please update this test to compare the loaded data with the expected data.
assert isinstance(loaded_data, dict) or isinstance(loaded_data, list)