Merge in RR/mini_queue from add-storage-handle to master
Squashed commit of the following:
commit 03e542d2a65802c28735873fae184209f0c83553
Author: Julius Unverfehrt <Julius.Unverfehrt@iqser.com>
Date: Wed Feb 16 11:55:34 2022 +0100
Quickfix typo
commit b4d538e9445187435d87c5cf8ce1f4e448021129
Author: Julius Unverfehrt <Julius.Unverfehrt@iqser.com>
Date: Wed Feb 16 11:41:42 2022 +0100
added prefetch count and make channel function
commit d46d1375e387d36641c06b062a8ccc54f114ef4c
Author: Julius Unverfehrt <Julius.Unverfehrt@iqser.com>
Date: Wed Feb 16 11:20:39 2022 +0100
black on M.s request
commit bc47b20312a978f19b08531804bf42b00f0a88f0
Author: Julius Unverfehrt <Julius.Unverfehrt@iqser.com>
Date: Wed Feb 16 11:19:57 2022 +0100
changed response
commit 9a475ecd8df9ca007e5f7fe146483b6403eccc3b
Author: Julius Unverfehrt <Julius.Unverfehrt@iqser.com>
Date: Wed Feb 16 10:15:08 2022 +0100
.
commit 108bc3ea90d867575db8c1b1503c9df859222485
Author: Julius Unverfehrt <Julius.Unverfehrt@iqser.com>
Date: Wed Feb 16 09:56:56 2022 +0100
quickrestore
commit ae04d17d8d041f612d86117e8e96c96ddffcbde3
Author: Julius Unverfehrt <Julius.Unverfehrt@iqser.com>
Date: Wed Feb 16 09:37:30 2022 +0100
refactor
commit 68051a72eb93868eba8adba234258b9e5373ecaa
Author: Julius Unverfehrt <Julius.Unverfehrt@iqser.com>
Date: Wed Feb 16 08:50:59 2022 +0100
added answer file template for rancher
commit 09ef45ead51c07732a20133acad0b8b2ae7d0a61
Author: Julius Unverfehrt <Julius.Unverfehrt@iqser.com>
Date: Wed Feb 16 08:26:05 2022 +0100
Quickfix inconsistency
commit d925b0f3f91f29403c88fb6149566ec966af2973
Author: Julius Unverfehrt <Julius.Unverfehrt@iqser.com>
Date: Wed Feb 16 08:20:40 2022 +0100
Quick refactor
commit 48795455cde8d97ed98e58c3004a87a26f331352
Author: Julius Unverfehrt <Julius.Unverfehrt@iqser.com>
Date: Tue Feb 15 17:46:45 2022 +0100
bluckckck
commit 80e58efab0269dc513990f83b14ceb36b3e4dd8e
Author: Julius Unverfehrt <Julius.Unverfehrt@iqser.com>
Date: Tue Feb 15 17:45:49 2022 +0100
Quick restatus setting
commit 83f276ee13348a678b7da84e25ca844dd348b4c9
Author: Julius Unverfehrt <Julius.Unverfehrt@iqser.com>
Date: Tue Feb 15 17:30:16 2022 +0100
Quickreset to working status
commit d44cdcf922250639a6832cc3e16d0d967d9853fb
Author: Julius Unverfehrt <Julius.Unverfehrt@iqser.com>
Date: Tue Feb 15 14:44:26 2022 +0100
added storage handle for minio WIP
89 lines
2.6 KiB
Python
89 lines
2.6 KiB
Python
"""Defines utilities for different operations on files."""
|
|
|
|
|
|
import gzip
|
|
import logging
|
|
import os
|
|
import shutil
|
|
import tempfile
|
|
from operator import itemgetter
|
|
|
|
|
|
def provide_directory(path):
|
|
if os.path.isfile(path):
|
|
provide_directory(os.path.dirname(path))
|
|
if not os.path.isdir(path):
|
|
try:
|
|
os.makedirs(path)
|
|
except FileExistsError:
|
|
pass
|
|
|
|
|
|
def produce_compressed_storage_pdf_object_name(path_no_ext, ext="pdf"):
|
|
return f"{path_no_ext}.ORIGIN.{ext}.gz"
|
|
|
|
|
|
def dossier_id_and_file_id_to_compressed_storage_pdf_object_name(dossier_id, file_id):
|
|
path_no_ext = os.path.join(dossier_id, file_id)
|
|
pdf_object_name = produce_compressed_storage_pdf_object_name(path_no_ext)
|
|
return pdf_object_name
|
|
|
|
|
|
def path_to_compressed_storage_pdf_object_name(path):
|
|
path_no_ext, ext = os.path.splitext(path)
|
|
path_gz = produce_compressed_storage_pdf_object_name(path_no_ext)
|
|
return path_gz
|
|
|
|
|
|
def unzip(gz_path, pdf_dir):
|
|
def inner():
|
|
|
|
path, ext = os.path.splitext(gz_path)
|
|
basename = os.path.basename(path)
|
|
dossier_id = os.path.basename(os.path.dirname(gz_path))
|
|
target_dir = os.path.join(pdf_dir, dossier_id)
|
|
provide_directory(target_dir)
|
|
target_path = os.path.join(target_dir, basename)
|
|
|
|
assert ext == ".gz"
|
|
|
|
logging.debug(f"unzipping {gz_path} into {target_path}")
|
|
|
|
with gzip.open(gz_path, "rb") as f_in:
|
|
with open(target_path, "wb") as f_out:
|
|
shutil.copyfileobj(f_in, f_out)
|
|
|
|
logging.debug(f"unzipped {gz_path} into {target_path}")
|
|
|
|
return target_path
|
|
|
|
try:
|
|
unzipped_file_path = inner()
|
|
finally:
|
|
shutil.rmtree(os.path.dirname(gz_path))
|
|
|
|
return unzipped_file_path
|
|
|
|
|
|
def download(storage_client, object_name, target_root_dir):
|
|
downloaded_file_path = storage_client.download_file(object_name, target_root_dir=target_root_dir)
|
|
logging.debug(f"Downloaded {object_name} into {downloaded_file_path}.")
|
|
return downloaded_file_path
|
|
|
|
|
|
def download_pdf_from_storage_via_request_payload(storage_client, payload: dict, pdf_dir: str):
|
|
|
|
provide_directory(pdf_dir)
|
|
|
|
with tempfile.TemporaryDirectory() as pdf_compressed_dir:
|
|
|
|
dossier_id, file_id = itemgetter("dossierId", "fileId")(payload)
|
|
object_name = dossier_id_and_file_id_to_compressed_storage_pdf_object_name(dossier_id, file_id)
|
|
downloaded_file_path = download(storage_client, object_name, pdf_compressed_dir)
|
|
unzipped_file_path = unzip(downloaded_file_path, pdf_dir)
|
|
return unzipped_file_path
|
|
|
|
|
|
def get_file_paths(directory):
|
|
return [os.path.join(directory, f) for f in os.listdir(directory)]
|