From 044ea6cf0a358152c36aa0eb6918862ddd31a18e Mon Sep 17 00:00:00 2001 From: Julius Unverfehrt Date: Thu, 16 Jan 2025 08:27:47 +0100 Subject: [PATCH] feat: streamline download to always include the filename of the downloaded file --- pyinfra/storage/utils.py | 25 +++++--- tests/unit_test/utils_download_test.py | 83 ++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 7 deletions(-) create mode 100644 tests/unit_test/utils_download_test.py diff --git a/pyinfra/storage/utils.py b/pyinfra/storage/utils.py index a40de3d..1d50c3c 100644 --- a/pyinfra/storage/utils.py +++ b/pyinfra/storage/utils.py @@ -1,6 +1,7 @@ import gzip import json from functools import singledispatch +from typing import TypedDict from kn_utils.logging import logger from pydantic import BaseModel, ValidationError @@ -58,12 +59,20 @@ class TargetResponseFilePathUploadPayload(BaseModel): responseFilePath: str -def download_data_bytes_as_specified_in_message(storage: Storage, raw_payload: dict) -> dict[str, bytes] | bytes: +class DownloadedData(TypedDict): + data: bytes + file_path: str + + +def download_data_bytes_as_specified_in_message( + storage: Storage, raw_payload: dict +) -> dict[str, DownloadedData] | DownloadedData: """Convenience function to download a file specified in a message payload. Supports both legacy and new payload formats. Also supports downloading multiple files at once, which should be specified in a dictionary under the 'targetFilePath' key with the file path as value. - - In all cases, the content will be returned as is (-> bytes). + The data is downloaded as bytes and returned as a dictionary with the file path as key and the data as value. + In case of several download targets, a nested dictionary is returned with the same keys and dictionaries with + the file path and data as values. """ try: @@ -82,23 +91,25 @@ def download_data_bytes_as_specified_in_message(storage: Storage, raw_payload: d @singledispatch -def _download(file_path_or_file_path_dict: str | dict[str, str], storage: Storage) -> dict[str, bytes] | bytes: +def _download( + file_path_or_file_path_dict: str | dict[str, str], storage: Storage +) -> dict[str, DownloadedData] | DownloadedData: pass @_download.register(str) -def _download_single_file(file_path: str, storage: Storage) -> bytes: +def _download_single_file(file_path: str, storage: Storage) -> DownloadedData: if not storage.exists(file_path): raise FileNotFoundError(f"File '{file_path}' does not exist in storage.") data = storage.get_object(file_path) logger.info(f"Downloaded {file_path} from storage.") - return data + return DownloadedData(data=data, file_path=file_path) @_download.register(dict) -def _download_multiple_files(file_path_dict: dict, storage: Storage) -> dict[str, bytes]: +def _download_multiple_files(file_path_dict: dict, storage: Storage) -> dict[str, DownloadedData]: return {key: _download(value, storage) for key, value in file_path_dict.items()} diff --git a/tests/unit_test/utils_download_test.py b/tests/unit_test/utils_download_test.py new file mode 100644 index 0000000..b431a54 --- /dev/null +++ b/tests/unit_test/utils_download_test.py @@ -0,0 +1,83 @@ +import json + +import pytest +from unittest.mock import patch +from pyinfra.storage.utils import ( + download_data_bytes_as_specified_in_message, + upload_data_as_specified_in_message, + DownloadedData, +) +from pyinfra.storage.storages.storage import Storage + + +@pytest.fixture +def mock_storage(): + with patch("pyinfra.storage.utils.Storage") as MockStorage: + yield MockStorage() + + +@pytest.fixture( + params=[ + { + "raw_payload": { + "tenantId": "tenant1", + "dossierId": "dossier1", + "fileId": "file1", + "targetFileExtension": "txt", + "responseFileExtension": "json", + }, + "expected_result": { + "data": b'{"key": "value"}', + "file_path": "tenant1/dossier1/file1.txt" + } + }, + { + "raw_payload": { + "targetFilePath": "some/path/to/file.txt.gz", + "responseFilePath": "some/path/to/file.json" + }, + "expected_result": { + "data": b'{"key": "value"}', + "file_path": "some/path/to/file.txt.gz" + } + }, + { + "raw_payload": { + "targetFilePath": { + "file1": "some/path/to/file1.txt.gz", + "file2": "some/path/to/file2.txt.gz" + }, + "responseFilePath": "some/path/to/file.json" + }, + "expected_result": { + "file1": { + "data": b'{"key": "value"}', + "file_path": "some/path/to/file1.txt.gz" + }, + "file2": { + "data": b'{"key": "value"}', + "file_path": "some/path/to/file2.txt.gz" + } + } + }, + ] +) +def payload_and_expected_result(request): + return request.param + +def test_download_data_bytes_as_specified_in_message(mock_storage, payload_and_expected_result): + raw_payload = payload_and_expected_result["raw_payload"] + expected_result = payload_and_expected_result["expected_result"] + mock_storage.get_object.return_value = b'{"key": "value"}' + + result = download_data_bytes_as_specified_in_message(mock_storage, raw_payload) + + assert isinstance(result, dict) + assert result == expected_result + mock_storage.get_object.assert_called() + +def test_upload_data_as_specified_in_message(mock_storage, payload_and_expected_result): + raw_payload = payload_and_expected_result["raw_payload"] + data = {"key": "value"} + upload_data_as_specified_in_message(mock_storage, raw_payload, data) + mock_storage.put_object.assert_called_once()