import abc import gzip import logging import os from itertools import repeat from operator import attrgetter from typing import Iterable from mini_queue.utils.file import path_to_compressed_storage_pdf_object_name, provide_directory from mini_queue.utils.meta import NoAttemptsLeft, max_attempts class StorageHandle: """Storage API base""" def __init__(self): self.default_container_name = None @abc.abstractmethod def __provide_container(self, container_name): pass @abc.abstractmethod def __add_file(self, path, filename, container_name=None): pass @max_attempts(n_attempts=10, max_timeout=60) def add_file(self, path: str, folder: str = None, container_name: str = None) -> None: """Adds a file to the store. Args: path: Path to file to add to store. folder: Folder to hold file. container_name: container to hold file. """ storage_path = self.__storage_path(path, folder=folder) self.__add_file(path, storage_path, container_name) @max_attempts() def _list_files(self, object_name_attr="object_name", container_name=None) -> Iterable[str]: """List all files in a container. Args: container_name: container to list files from. Returns: Iterable of filenames. """ if container_name is None: container_name = self.default_container_name return map(attrgetter(object_name_attr), self.get_objects(container_name)) @abc.abstractmethod def list_files(self, container_name=None) -> Iterable[str]: pass @abc.abstractmethod def get_objects(self, container_name=None): pass @abc.abstractmethod def __list_containers(self): pass @max_attempts() def get_all_objects(self) -> Iterable: """Gets all objects in the store Returns: Iterable over all objects in the store. """ for container in self.__list_containers(): yield from zip(repeat(container), self.get_objects(container.name)) @abc.abstractmethod def __purge(self) -> None: pass @max_attempts() def purge(self) -> None: self.__purge() def list_files_by_type(self, container_name=None, extension=".pdf.gz"): return filter(lambda p: p.endswith(extension), self.list_files(container_name)) @abc.abstractmethod def __fget_object(self, *args, **kwargs): pass @staticmethod def __storage_path(path, folder: str = None): def path_to_filename(path): return os.path.basename(path) storage_path = path_to_filename(path) if folder is not None: storage_path = os.path.join(folder, storage_path) return storage_path @max_attempts() def list_folders_and_files(self, container_name: str = None) -> Iterable[str]: """Lists pairs of folder name (dossier-IDs) and file name (file-IDs) of items in a container. Args: container_name: container to list items for. Returns: Iterable of pairs folder name (dossier-ID) and file names (file-ID) """ return map(lambda p: p.split("/"), self.list_files_by_type(container_name)) @abc.abstractmethod def __remove_file(self, folder: str, filename: str, container_name: str = None) -> None: pass @max_attempts() def remove_file(self, folder: str, filename: str, container_name: str = None) -> None: self.__remove_file(folder, filename, container_name) def add_file_compressed(self, path, folder: str = None, container_name: str = None) -> None: """Adds a file as a .gz archive to the store. Args: path: Path to file to add to store. folder: Folder to hold file. container_name: container to hold file. """ def compress(path_in: str, path_out: str): with open(path_in, "rb") as f_in, gzip.open(path_out, "wb") as f_out: f_out.writelines(f_in) path_gz = path_to_compressed_storage_pdf_object_name(path) compress(path, path_gz) self.add_file(path_gz, folder, container_name) os.unlink(path_gz) @max_attempts() def download_file(self, object_names: str, target_root_dir: str, container_name: str = None) -> str: """Downloads a file from the store. Args: object_names: Complete object name (folder and file). target_root_dir: Root directory to download file into (including its folder). container_name: container to load file from. Returns: str: Path to downloaded file. """ @max_attempts(5, exceptions=(FileNotFoundError,)) def download(object_name: str) -> str: path, basename = os.path.split(object_name) target_dir = os.path.join(target_root_dir, path) provide_directory(target_dir) target_path = os.path.join(target_dir, basename) logging.log(msg=f"Downloading {object_name}...", level=logging.DEBUG) try: self.__fget_object(container_name, object_name, target_path) logging.log(msg=f"Downloaded {object_name}.", level=logging.DEBUG) except Exception as err: logging.log(msg=f"Downloading {object_name} failed.", level=logging.ERROR) raise err return target_path if container_name is None: container_name = self.default_container_name try: target_path = download(object_names) except NoAttemptsLeft as err: logging.log(msg=f"{err}", level=logging.ERROR) raise err return target_path