We now compare the output proto json conversion to expected json files. This revealed multiple differences between the file. FIXED: int64 type was cast into string in python. We now get proper integers TODO: Empty fields are omitted by proto, but the jsons have them and the pyinfra implementing services might expect them. We have to test this behaviour and adjusts the tests accordingly.
79 lines
2.3 KiB
Python
79 lines
2.3 KiB
Python
import gzip
|
|
import json
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from deepdiff import DeepDiff
|
|
|
|
from pyinfra.storage.proto_data_loader import ProtoDataLoader
|
|
|
|
|
|
@pytest.fixture
|
|
def test_data_dir():
|
|
return Path(__file__).parents[1] / "data"
|
|
|
|
|
|
@pytest.fixture
|
|
def document_data(request, test_data_dir) -> (str, bytes, dict | list):
|
|
doc_type = request.param
|
|
input_file_path = test_data_dir / f"72ea04dfdbeb277f37b9eb127efb0896.{doc_type}.proto.gz"
|
|
target_file_path = test_data_dir / f"3f9d3d9f255007de8eff13648321e197.{doc_type}.json.gz"
|
|
|
|
input_data = input_file_path.read_bytes()
|
|
target_data = json.loads(gzip.decompress(target_file_path.read_bytes()))
|
|
|
|
return input_file_path, input_data, target_data
|
|
|
|
|
|
@pytest.fixture
|
|
def proto_data_loader():
|
|
return ProtoDataLoader()
|
|
|
|
|
|
@pytest.fixture
|
|
def should_match():
|
|
return [
|
|
"a.DOCUMENT_STRUCTURE.proto.gz",
|
|
"a.DOCUMENT_TEXT.proto.gz",
|
|
"a.DOCUMENT_PAGES.proto.gz",
|
|
"a.DOCUMENT_POSITION.proto.gz",
|
|
"b.DOCUMENT_STRUCTURE.proto",
|
|
"b.DOCUMENT_TEXT.proto",
|
|
"b.DOCUMENT_PAGES.proto",
|
|
"b.DOCUMENT_POSITION.proto",
|
|
"c.STRUCTURE.proto.gz",
|
|
"c.TEXT.proto.gz",
|
|
"c.PAGES.proto.gz",
|
|
"c.POSITION.proto.gz",
|
|
]
|
|
|
|
|
|
@pytest.mark.xfail(
|
|
reason="FIXME: The test is not stable, but hast to work before we can deploy the code! Right now, we don't have parity between the proto and the json data."
|
|
)
|
|
@pytest.mark.parametrize(
|
|
"document_data", ["DOCUMENT_STRUCTURE", "DOCUMENT_TEXT", "DOCUMENT_POSITION", "DOCUMENT_PAGES"], indirect=True
|
|
)
|
|
def test_proto_data_loader_end2end(document_data, proto_data_loader):
|
|
file_path, data, target = document_data
|
|
data = gzip.decompress(data)
|
|
loaded_data = proto_data_loader(file_path, data)
|
|
|
|
diff = DeepDiff(loaded_data, target, ignore_order=True)
|
|
|
|
# FIXME: remove this block when the test is stable
|
|
if diff:
|
|
with open("/tmp/diff.json", "w") as f:
|
|
f.write(diff.to_json(indent=2))
|
|
|
|
assert not diff
|
|
|
|
|
|
def test_proto_data_loader_unknown_document_type(proto_data_loader):
|
|
assert not proto_data_loader("unknown_document_type.proto", b"")
|
|
|
|
|
|
def test_proto_data_loader_file_name_matching(proto_data_loader, should_match):
|
|
for file_name in should_match:
|
|
assert proto_data_loader._match(file_name) is not None
|