pyinfra/tests/unit_test/proto_data_loader_test.py
2024-07-17 13:54:50 +02:00

91 lines
2.7 KiB
Python

import gzip
from pathlib import Path
import pytest
from pyinfra.storage.proto_data_loader import ProtoDataLoader
@pytest.fixture
def test_data_dir():
return Path(__file__).parents[1] / "data"
@pytest.fixture
def document_structure_document(test_data_dir) -> (str, bytes):
file_path = test_data_dir / "72ea04dfdbeb277f37b9eb127efb0896.DOCUMENT_STRUCTURE.proto.gz"
data = file_path.read_bytes()
return file_path, data
@pytest.fixture
def document_text_document(test_data_dir) -> (str, bytes):
file_path = test_data_dir / "72ea04dfdbeb277f37b9eb127efb0896.DOCUMENT_TEXT.proto.gz"
data = file_path.read_bytes()
return file_path, data
@pytest.fixture
def document_pages_document(test_data_dir) -> (str, bytes):
file_path = test_data_dir / "72ea04dfdbeb277f37b9eb127efb0896.DOCUMENT_PAGES.proto.gz"
data = file_path.read_bytes()
return file_path, data
@pytest.fixture
def document_position_document(test_data_dir) -> (str, bytes):
file_path = test_data_dir / "72ea04dfdbeb277f37b9eb127efb0896.DOCUMENT_POSITION.proto.gz"
data = file_path.read_bytes()
return file_path, data
@pytest.fixture
def proto_data_loader():
return ProtoDataLoader()
@pytest.fixture
def should_match():
return [
"a.DOCUMENT_STRUCTURE.proto.gz",
"a.DOCUMENT_TEXT.proto.gz",
"a.DOCUMENT_PAGES.proto.gz",
"a.DOCUMENT_POSITION.proto.gz",
"b.DOCUMENT_STRUCTURE.proto",
"b.DOCUMENT_TEXT.proto",
"b.DOCUMENT_PAGES.proto",
"b.DOCUMENT_POSITION.proto",
"c.STRUCTURE.proto.gz",
"c.TEXT.proto.gz",
"c.PAGES.proto.gz",
"c.POSITION.proto.gz",
]
@pytest.mark.parametrize(
"document_fixture",
[
"document_structure_document",
"document_text_document",
"document_pages_document",
"document_position_document",
],
)
def test_proto_data_loader_end2end(document_fixture, request, proto_data_loader):
file_path, data = request.getfixturevalue(document_fixture)
data = gzip.decompress(data)
loaded_data = proto_data_loader(file_path, data)
# TODO: Right now, we don't have access to proto-json pairs to compare the loaded data with the expected data.
# If this becomes available, please update this test to compare the loaded data with the expected data.
assert isinstance(loaded_data, dict) or isinstance(loaded_data, list)
def test_proto_data_loader_unknown_document_type(proto_data_loader):
assert not proto_data_loader("unknown_document_type.proto", b"")
def test_proto_data_loader_file_name_matching(proto_data_loader, should_match):
for file_name in should_match:
assert proto_data_loader._match(file_name) is not None