Merge in RR/cv-analysis from new_pyinfra to master
Squashed commit of the following:
commit f7a01a90aad1c402ac537de5bdf15df628ad54df
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Wed Jul 27 10:40:59 2022 +0200
fix typo
commit ff4d549fac5b612c2d391ae85823c5eca1e91916
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Wed Jul 27 10:34:04 2022 +0200
adjust build scripts for new pyinfra
commit ecd70f60d46406d8b6cc7f36a1533d706c917ca8
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Wed Jul 27 09:42:55 2022 +0200
simplify logging by using default configurations
commit 20193c14c940eed2b0a7a72058167e26064119d0
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Jul 26 17:16:57 2022 +0200
tidy-up, refactor config logic to not dependent on external files
commit d8069cd4d404a570bb04a04278161669d1c83332
Author: Isaac Riley <Isaac.Riley@iqser.com>
Date: Tue Jul 26 15:14:59 2022 +0200
update pyinfra
commit c3bc11037cca9baf016043ab997c566f5b4a2586
Author: Isaac Riley <Isaac.Riley@iqser.com>
Date: Tue Jul 26 15:09:14 2022 +0200
repair tests
commit 6f4e4f2863ee16ae056c1d432f663858c5f10221
Author: Isaac Riley <Isaac.Riley@iqser.com>
Date: Tue Jul 26 14:52:38 2022 +0200
updated server logic to work with new pyinfra; update scripts for pyinfra as submodule
commit 2a18dba81de5ee84d0bdf0e77f478693e8d8aef4
Author: Isaac Riley <Isaac.Riley@iqser.com>
Date: Tue Jul 26 14:10:41 2022 +0200
formatting
commit d87ce9328de9aa2341228af9b24473d5e583504e
Author: Isaac Riley <Isaac.Riley@iqser.com>
Date: Tue Jul 26 14:10:11 2022 +0200
make server logic compatible with new pyinfra
69 lines
2.0 KiB
Python
69 lines
2.0 KiB
Python
import argparse
|
|
import gzip
|
|
import os
|
|
from pathlib import Path
|
|
|
|
from tqdm import tqdm
|
|
|
|
from pyinfra.config import get_config
|
|
from pyinfra.storage.storage import get_s3_storage
|
|
|
|
CONFIG = get_config()
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser()
|
|
|
|
subparsers = parser.add_subparsers(help="sub-command help", dest="command")
|
|
|
|
parser_add = subparsers.add_parser("add", help="Add file(s) to the MinIO store")
|
|
parser_add.add_argument("dossier_id")
|
|
add_group = parser_add.add_mutually_exclusive_group(required=True)
|
|
add_group.add_argument("--file", "-f")
|
|
add_group.add_argument("--directory", "-d")
|
|
|
|
subparsers.add_parser("purge", help="Delete all files and buckets in the MinIO store")
|
|
|
|
args = parser.parse_args()
|
|
return args
|
|
|
|
|
|
def combine_dossier_id_and_file_id_and_extension(dossier_id, file_id, extension):
|
|
return f"{dossier_id}/{file_id}{extension}"
|
|
|
|
|
|
def add_file_compressed(storage, bucket_name, dossier_id, path) -> None:
|
|
if Path(path).suffix == ".pdf":
|
|
suffix_gz = ".ORIGIN.pdf.gz"
|
|
if Path(path).suffix == ".json":
|
|
suffix_gz = ".TEXT.json.gz"
|
|
path_gz = combine_dossier_id_and_file_id_and_extension(dossier_id, Path(path).stem, suffix_gz)
|
|
|
|
with open(path, "rb") as f:
|
|
data = gzip.compress(f.read())
|
|
storage.put_object(bucket_name, path_gz, data)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
storage = get_s3_storage(CONFIG)
|
|
bucket_name = CONFIG.storage_bucket
|
|
|
|
if not storage.has_bucket(bucket_name):
|
|
storage.make_bucket(bucket_name)
|
|
|
|
args = parse_args()
|
|
|
|
if args.command == "add":
|
|
|
|
if args.file:
|
|
add_file_compressed(storage, bucket_name, args.dossier_id, args.file)
|
|
|
|
elif args.directory:
|
|
for fname in tqdm([*os.listdir(args.directory)], desc="Adding files"):
|
|
path = Path(args.directory) / fname
|
|
add_file_compressed(storage, bucket_name, args.dossier_id, path)
|
|
|
|
elif args.command == "purge":
|
|
storage.clear_bucket(bucket_name)
|