feat: streamline download to always include the filename of the downloaded file

This commit is contained in:
Julius Unverfehrt 2025-01-16 08:27:47 +01:00
parent ff7547e2c6
commit 044ea6cf0a
2 changed files with 101 additions and 7 deletions

View File

@ -1,6 +1,7 @@
import gzip
import json
from functools import singledispatch
from typing import TypedDict
from kn_utils.logging import logger
from pydantic import BaseModel, ValidationError
@ -58,12 +59,20 @@ class TargetResponseFilePathUploadPayload(BaseModel):
responseFilePath: str
def download_data_bytes_as_specified_in_message(storage: Storage, raw_payload: dict) -> dict[str, bytes] | bytes:
class DownloadedData(TypedDict):
data: bytes
file_path: str
def download_data_bytes_as_specified_in_message(
storage: Storage, raw_payload: dict
) -> dict[str, DownloadedData] | DownloadedData:
"""Convenience function to download a file specified in a message payload.
Supports both legacy and new payload formats. Also supports downloading multiple files at once, which should
be specified in a dictionary under the 'targetFilePath' key with the file path as value.
In all cases, the content will be returned as is (-> bytes).
The data is downloaded as bytes and returned as a dictionary with the file path as key and the data as value.
In case of several download targets, a nested dictionary is returned with the same keys and dictionaries with
the file path and data as values.
"""
try:
@ -82,23 +91,25 @@ def download_data_bytes_as_specified_in_message(storage: Storage, raw_payload: d
@singledispatch
def _download(file_path_or_file_path_dict: str | dict[str, str], storage: Storage) -> dict[str, bytes] | bytes:
def _download(
file_path_or_file_path_dict: str | dict[str, str], storage: Storage
) -> dict[str, DownloadedData] | DownloadedData:
pass
@_download.register(str)
def _download_single_file(file_path: str, storage: Storage) -> bytes:
def _download_single_file(file_path: str, storage: Storage) -> DownloadedData:
if not storage.exists(file_path):
raise FileNotFoundError(f"File '{file_path}' does not exist in storage.")
data = storage.get_object(file_path)
logger.info(f"Downloaded {file_path} from storage.")
return data
return DownloadedData(data=data, file_path=file_path)
@_download.register(dict)
def _download_multiple_files(file_path_dict: dict, storage: Storage) -> dict[str, bytes]:
def _download_multiple_files(file_path_dict: dict, storage: Storage) -> dict[str, DownloadedData]:
return {key: _download(value, storage) for key, value in file_path_dict.items()}

View File

@ -0,0 +1,83 @@
import json
import pytest
from unittest.mock import patch
from pyinfra.storage.utils import (
download_data_bytes_as_specified_in_message,
upload_data_as_specified_in_message,
DownloadedData,
)
from pyinfra.storage.storages.storage import Storage
@pytest.fixture
def mock_storage():
with patch("pyinfra.storage.utils.Storage") as MockStorage:
yield MockStorage()
@pytest.fixture(
params=[
{
"raw_payload": {
"tenantId": "tenant1",
"dossierId": "dossier1",
"fileId": "file1",
"targetFileExtension": "txt",
"responseFileExtension": "json",
},
"expected_result": {
"data": b'{"key": "value"}',
"file_path": "tenant1/dossier1/file1.txt"
}
},
{
"raw_payload": {
"targetFilePath": "some/path/to/file.txt.gz",
"responseFilePath": "some/path/to/file.json"
},
"expected_result": {
"data": b'{"key": "value"}',
"file_path": "some/path/to/file.txt.gz"
}
},
{
"raw_payload": {
"targetFilePath": {
"file1": "some/path/to/file1.txt.gz",
"file2": "some/path/to/file2.txt.gz"
},
"responseFilePath": "some/path/to/file.json"
},
"expected_result": {
"file1": {
"data": b'{"key": "value"}',
"file_path": "some/path/to/file1.txt.gz"
},
"file2": {
"data": b'{"key": "value"}',
"file_path": "some/path/to/file2.txt.gz"
}
}
},
]
)
def payload_and_expected_result(request):
return request.param
def test_download_data_bytes_as_specified_in_message(mock_storage, payload_and_expected_result):
raw_payload = payload_and_expected_result["raw_payload"]
expected_result = payload_and_expected_result["expected_result"]
mock_storage.get_object.return_value = b'{"key": "value"}'
result = download_data_bytes_as_specified_in_message(mock_storage, raw_payload)
assert isinstance(result, dict)
assert result == expected_result
mock_storage.get_object.assert_called()
def test_upload_data_as_specified_in_message(mock_storage, payload_and_expected_result):
raw_payload = payload_and_expected_result["raw_payload"]
data = {"key": "value"}
upload_data_as_specified_in_message(mock_storage, raw_payload, data)
mock_storage.put_object.assert_called_once()