"""Defines utilities for different operations on files.""" import gzip import logging import os import shutil import tempfile from operator import itemgetter def provide_directory(path): if os.path.isfile(path): provide_directory(os.path.dirname(path)) if not os.path.isdir(path): try: os.makedirs(path) except FileExistsError: pass def produce_compressed_storage_pdf_object_name(path_no_ext, ext="pdf"): return f"{path_no_ext}.ORIGIN.{ext}.gz" def dossier_id_and_file_id_to_compressed_storage_pdf_object_name(dossier_id, file_id): path_no_ext = os.path.join(dossier_id, file_id) pdf_object_name = produce_compressed_storage_pdf_object_name(path_no_ext) return pdf_object_name def path_to_compressed_storage_pdf_object_name(path): path_no_ext, ext = os.path.splitext(path) path_gz = produce_compressed_storage_pdf_object_name(path_no_ext) return path_gz def unzip(gz_path, pdf_dir): def inner(): path, ext = os.path.splitext(gz_path) basename = os.path.basename(path) dossier_id = os.path.basename(os.path.dirname(gz_path)) target_dir = os.path.join(pdf_dir, dossier_id) provide_directory(target_dir) target_path = os.path.join(target_dir, basename) assert ext == ".gz" logging.debug(f"unzipping {gz_path} into {target_path}") with gzip.open(gz_path, "rb") as f_in: with open(target_path, "wb") as f_out: shutil.copyfileobj(f_in, f_out) logging.debug(f"unzipped {gz_path} into {target_path}") return target_path try: unzipped_file_path = inner() finally: shutil.rmtree(os.path.dirname(gz_path)) return unzipped_file_path def download(storage_client, object_name, target_root_dir): downloaded_file_path = storage_client.download_file(object_name, target_root_dir=target_root_dir) logging.debug(f"Downloaded {object_name} into {downloaded_file_path}.") return downloaded_file_path def download_pdf_from_storage_via_request_payload(storage_client, payload: dict, pdf_dir: str): provide_directory(pdf_dir) with tempfile.TemporaryDirectory() as pdf_compressed_dir: dossier_id, file_id = itemgetter("dossierId", "fileId")(payload) object_name = dossier_id_and_file_id_to_compressed_storage_pdf_object_name(dossier_id, file_id) downloaded_file_path = download(storage_client, object_name, pdf_compressed_dir) unzipped_file_path = unzip(downloaded_file_path, pdf_dir) return unzipped_file_path def get_file_paths(directory): return [os.path.join(directory, f) for f in os.listdir(directory)]