feat: streamline download to always include the filename of the downloaded file

This commit is contained in:
Julius Unverfehrt 2025-01-16 08:27:47 +01:00
parent ff7547e2c6
commit 044ea6cf0a
2 changed files with 101 additions and 7 deletions

View File

@ -1,6 +1,7 @@
import gzip import gzip
import json import json
from functools import singledispatch from functools import singledispatch
from typing import TypedDict
from kn_utils.logging import logger from kn_utils.logging import logger
from pydantic import BaseModel, ValidationError from pydantic import BaseModel, ValidationError
@ -58,12 +59,20 @@ class TargetResponseFilePathUploadPayload(BaseModel):
responseFilePath: str responseFilePath: str
def download_data_bytes_as_specified_in_message(storage: Storage, raw_payload: dict) -> dict[str, bytes] | bytes: class DownloadedData(TypedDict):
data: bytes
file_path: str
def download_data_bytes_as_specified_in_message(
storage: Storage, raw_payload: dict
) -> dict[str, DownloadedData] | DownloadedData:
"""Convenience function to download a file specified in a message payload. """Convenience function to download a file specified in a message payload.
Supports both legacy and new payload formats. Also supports downloading multiple files at once, which should Supports both legacy and new payload formats. Also supports downloading multiple files at once, which should
be specified in a dictionary under the 'targetFilePath' key with the file path as value. be specified in a dictionary under the 'targetFilePath' key with the file path as value.
The data is downloaded as bytes and returned as a dictionary with the file path as key and the data as value.
In all cases, the content will be returned as is (-> bytes). In case of several download targets, a nested dictionary is returned with the same keys and dictionaries with
the file path and data as values.
""" """
try: try:
@ -82,23 +91,25 @@ def download_data_bytes_as_specified_in_message(storage: Storage, raw_payload: d
@singledispatch @singledispatch
def _download(file_path_or_file_path_dict: str | dict[str, str], storage: Storage) -> dict[str, bytes] | bytes: def _download(
file_path_or_file_path_dict: str | dict[str, str], storage: Storage
) -> dict[str, DownloadedData] | DownloadedData:
pass pass
@_download.register(str) @_download.register(str)
def _download_single_file(file_path: str, storage: Storage) -> bytes: def _download_single_file(file_path: str, storage: Storage) -> DownloadedData:
if not storage.exists(file_path): if not storage.exists(file_path):
raise FileNotFoundError(f"File '{file_path}' does not exist in storage.") raise FileNotFoundError(f"File '{file_path}' does not exist in storage.")
data = storage.get_object(file_path) data = storage.get_object(file_path)
logger.info(f"Downloaded {file_path} from storage.") logger.info(f"Downloaded {file_path} from storage.")
return data return DownloadedData(data=data, file_path=file_path)
@_download.register(dict) @_download.register(dict)
def _download_multiple_files(file_path_dict: dict, storage: Storage) -> dict[str, bytes]: def _download_multiple_files(file_path_dict: dict, storage: Storage) -> dict[str, DownloadedData]:
return {key: _download(value, storage) for key, value in file_path_dict.items()} return {key: _download(value, storage) for key, value in file_path_dict.items()}

View File

@ -0,0 +1,83 @@
import json
import pytest
from unittest.mock import patch
from pyinfra.storage.utils import (
download_data_bytes_as_specified_in_message,
upload_data_as_specified_in_message,
DownloadedData,
)
from pyinfra.storage.storages.storage import Storage
@pytest.fixture
def mock_storage():
with patch("pyinfra.storage.utils.Storage") as MockStorage:
yield MockStorage()
@pytest.fixture(
params=[
{
"raw_payload": {
"tenantId": "tenant1",
"dossierId": "dossier1",
"fileId": "file1",
"targetFileExtension": "txt",
"responseFileExtension": "json",
},
"expected_result": {
"data": b'{"key": "value"}',
"file_path": "tenant1/dossier1/file1.txt"
}
},
{
"raw_payload": {
"targetFilePath": "some/path/to/file.txt.gz",
"responseFilePath": "some/path/to/file.json"
},
"expected_result": {
"data": b'{"key": "value"}',
"file_path": "some/path/to/file.txt.gz"
}
},
{
"raw_payload": {
"targetFilePath": {
"file1": "some/path/to/file1.txt.gz",
"file2": "some/path/to/file2.txt.gz"
},
"responseFilePath": "some/path/to/file.json"
},
"expected_result": {
"file1": {
"data": b'{"key": "value"}',
"file_path": "some/path/to/file1.txt.gz"
},
"file2": {
"data": b'{"key": "value"}',
"file_path": "some/path/to/file2.txt.gz"
}
}
},
]
)
def payload_and_expected_result(request):
return request.param
def test_download_data_bytes_as_specified_in_message(mock_storage, payload_and_expected_result):
raw_payload = payload_and_expected_result["raw_payload"]
expected_result = payload_and_expected_result["expected_result"]
mock_storage.get_object.return_value = b'{"key": "value"}'
result = download_data_bytes_as_specified_in_message(mock_storage, raw_payload)
assert isinstance(result, dict)
assert result == expected_result
mock_storage.get_object.assert_called()
def test_upload_data_as_specified_in_message(mock_storage, payload_and_expected_result):
raw_payload = payload_and_expected_result["raw_payload"]
data = {"key": "value"}
upload_data_as_specified_in_message(mock_storage, raw_payload, data)
mock_storage.put_object.assert_called_once()