pyinfra/tests/unit_test/proto_data_loader_test.py
Julius Unverfehrt 70d3a210a1 feat: update data loader tests
We now compare the output proto json conversion to expected json files.
This revealed multiple differences between the file.

FIXED: int64 type was cast into string in python. We now get proper
integers

TODO: Empty fields are omitted by proto, but the jsons have them and the
pyinfra implementing services might expect them. We have to test this
behaviour and adjusts the tests accordingly.
2024-07-18 12:36:29 +02:00

79 lines
2.3 KiB
Python

import gzip
import json
from pathlib import Path
import pytest
from deepdiff import DeepDiff
from pyinfra.storage.proto_data_loader import ProtoDataLoader
@pytest.fixture
def test_data_dir():
return Path(__file__).parents[1] / "data"
@pytest.fixture
def document_data(request, test_data_dir) -> (str, bytes, dict | list):
doc_type = request.param
input_file_path = test_data_dir / f"72ea04dfdbeb277f37b9eb127efb0896.{doc_type}.proto.gz"
target_file_path = test_data_dir / f"3f9d3d9f255007de8eff13648321e197.{doc_type}.json.gz"
input_data = input_file_path.read_bytes()
target_data = json.loads(gzip.decompress(target_file_path.read_bytes()))
return input_file_path, input_data, target_data
@pytest.fixture
def proto_data_loader():
return ProtoDataLoader()
@pytest.fixture
def should_match():
return [
"a.DOCUMENT_STRUCTURE.proto.gz",
"a.DOCUMENT_TEXT.proto.gz",
"a.DOCUMENT_PAGES.proto.gz",
"a.DOCUMENT_POSITION.proto.gz",
"b.DOCUMENT_STRUCTURE.proto",
"b.DOCUMENT_TEXT.proto",
"b.DOCUMENT_PAGES.proto",
"b.DOCUMENT_POSITION.proto",
"c.STRUCTURE.proto.gz",
"c.TEXT.proto.gz",
"c.PAGES.proto.gz",
"c.POSITION.proto.gz",
]
@pytest.mark.xfail(
reason="FIXME: The test is not stable, but hast to work before we can deploy the code! Right now, we don't have parity between the proto and the json data."
)
@pytest.mark.parametrize(
"document_data", ["DOCUMENT_STRUCTURE", "DOCUMENT_TEXT", "DOCUMENT_POSITION", "DOCUMENT_PAGES"], indirect=True
)
def test_proto_data_loader_end2end(document_data, proto_data_loader):
file_path, data, target = document_data
data = gzip.decompress(data)
loaded_data = proto_data_loader(file_path, data)
diff = DeepDiff(loaded_data, target, ignore_order=True)
# FIXME: remove this block when the test is stable
if diff:
with open("/tmp/diff.json", "w") as f:
f.write(diff.to_json(indent=2))
assert not diff
def test_proto_data_loader_unknown_document_type(proto_data_loader):
assert not proto_data_loader("unknown_document_type.proto", b"")
def test_proto_data_loader_file_name_matching(proto_data_loader, should_match):
for file_name in should_match:
assert proto_data_loader._match(file_name) is not None