64 lines
1.9 KiB
Python
64 lines
1.9 KiB
Python
import gzip
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from pyinfra.storage.proto_data_loader import ProtoDataLoader
|
|
|
|
|
|
@pytest.fixture
|
|
def test_data_dir():
|
|
return Path(__file__).parents[1] / "data"
|
|
|
|
|
|
@pytest.fixture
|
|
def document_structure_document(test_data_dir) -> (str, bytes):
|
|
file_path = test_data_dir / "72ea04dfdbeb277f37b9eb127efb0896.DOCUMENT_STRUCTURE.proto.gz"
|
|
data = file_path.read_bytes()
|
|
return file_path, data
|
|
|
|
|
|
@pytest.fixture
|
|
def document_text_document(test_data_dir) -> (str, bytes):
|
|
file_path = test_data_dir / "72ea04dfdbeb277f37b9eb127efb0896.DOCUMENT_TEXT.proto.gz"
|
|
data = file_path.read_bytes()
|
|
return file_path, data
|
|
|
|
|
|
@pytest.fixture
|
|
def document_pages_document(test_data_dir) -> (str, bytes):
|
|
file_path = test_data_dir / "72ea04dfdbeb277f37b9eb127efb0896.DOCUMENT_PAGES.proto.gz"
|
|
data = file_path.read_bytes()
|
|
return file_path, data
|
|
|
|
|
|
@pytest.fixture
|
|
def document_position_document(test_data_dir) -> (str, bytes):
|
|
file_path = test_data_dir / "72ea04dfdbeb277f37b9eb127efb0896.DOCUMENT_POSITION.proto.gz"
|
|
data = file_path.read_bytes()
|
|
return file_path, data
|
|
|
|
|
|
@pytest.fixture
|
|
def proto_data_loader():
|
|
return ProtoDataLoader()
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"document_fixture",
|
|
[
|
|
"document_structure_document",
|
|
"document_text_document",
|
|
"document_pages_document",
|
|
"document_position_document",
|
|
],
|
|
)
|
|
def test_proto_data_loader(document_fixture, request, proto_data_loader):
|
|
file_path, data = request.getfixturevalue(document_fixture)
|
|
data = gzip.decompress(data)
|
|
loaded_data = proto_data_loader(file_path, data)
|
|
|
|
# TODO: Right now, we don't have access to proto-json pairs to compare the loaded data with the expected data.
|
|
# If this becomes available, please update this test to compare the loaded data with the expected data.
|
|
assert isinstance(loaded_data, dict) or isinstance(loaded_data, list)
|