chore: cleanup test

This commit is contained in:
Jonathan Kössler 2024-09-23 16:43:59 +02:00
parent a81f1bf31a
commit 09d39930e7

View File

@ -1,9 +1,7 @@
import gzip
import json
import difflib
from pathlib import Path
from google.protobuf import json_format
import pytest
from deepdiff import DeepDiff
@ -19,10 +17,10 @@ def test_data_dir():
@pytest.fixture
def document_data(request, test_data_dir) -> (str, bytes, dict | list):
doc_type = request.param
input_file_path = test_data_dir / f"72ea04dfdbeb277f37b9eb127efb0896.{doc_type}.proto.gz"
# input_file_path = test_data_dir / f"6ff38b030fa131e8e39bf5598513f981.{doc_type}.proto.gz" # new proto schema
# input_file_path = test_data_dir / f"8d1e6798a2c5dc14869e5b3ad8ae501f.{doc_type}.proto.gz"
target_file_path = test_data_dir / f"3f9d3d9f255007de8eff13648321e197.{doc_type}.json.gz"
# Search for relevant doc_type file pairs - there should be one proto and one json file per document type
input_file_path = next(test_data_dir.glob(f"*.{doc_type}.proto.gz"), None)
target_file_path = next(test_data_dir.glob(f"*.{doc_type}.json.gz"), None)
input_data = input_file_path.read_bytes()
target_data = json.loads(gzip.decompress(target_file_path.read_bytes()))
@ -63,26 +61,17 @@ def test_proto_data_loader_end2end(document_data, proto_data_loader):
data = gzip.decompress(data)
loaded_data = proto_data_loader(file_path, data)
# proto_json = json_format.MessageToJson(loaded_data)
loaded_data_str = json.dumps(loaded_data, sort_keys=True)
target_str = json.dumps(target, sort_keys=True)
# diff = difflib.unified_diff(loaded_data_str.splitlines(), target_str.splitlines())
# for line in diff:
# print(line)
# diff = DeepDiff(loaded_data, target, ignore_order=True)
# print(diff)
diff = DeepDiff(sorted(loaded_data_str), sorted(target_str), ignore_order=True)
diff = DeepDiff(loaded_data_str, target_str, ignore_order=True)
# diff = DeepDiff(sorted(loaded_data_str), sorted(target_str), ignore_order=True)
# FIXME: remove this block when the test is stable
# if diff:
# print(diff.to_json(indent=2))
# with open(f"diff_{file_path}.json", "w") as f:
# f.write(diff.to_json(indent=2))
# with open(f"diff_test.json", "w") as f:
# f.write(diff.to_json(indent=4))
assert not diff