pyinfra/scripts/mock_client.py
Julius Unverfehrt 11c9f8a073 Pull request #25: Fixes
Merge in RR/pyinfra from fixes to master

Squashed commit of the following:

commit e3eff12ccdea52e041cc7a14cda72d3e32aa2144
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Tue Mar 22 15:42:35 2022 +0100

    black

commit 2bc520c849ea4e833cb60b2c97626da6636d3155
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Tue Mar 22 15:42:08 2022 +0100

    adjust mock script

commit 429b6b8f3a3fc8aa35515395712057d1c7bec13e
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Tue Mar 22 15:41:42 2022 +0100

    change scope for retry consume

commit 7488394e313270fe7ba356c40d810e7cb3c706ee
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Tue Mar 22 15:39:39 2022 +0100

    add heartbeat to AMQP connection

commit 004c5fa805bfb982f55de533bc109fa21bacfbc8
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Tue Mar 22 15:38:15 2022 +0100

    Adjust error handling for missing prometheus endpoint: error is logged not raised
2022-03-22 15:48:27 +01:00

84 lines
2.8 KiB
Python

import argparse
import json
import pika
from pyinfra.config import CONFIG
from pyinfra.storage.storages import get_s3_storage
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--analysis_container", "-a", choices=["detr", "ner", "image"], required=True)
args = parser.parse_args()
return args
def read_connection_params():
credentials = pika.PlainCredentials(CONFIG.rabbitmq.user, CONFIG.rabbitmq.password)
parameters = pika.ConnectionParameters(
host=CONFIG.rabbitmq.host,
port=CONFIG.rabbitmq.port,
heartbeat=CONFIG.rabbitmq.heartbeat,
credentials=credentials,
)
return parameters
def make_channel(connection) -> pika.adapters.blocking_connection.BlockingChannel:
channel = connection.channel()
channel.basic_qos(prefetch_count=1)
return channel
def declare_queue(channel, queue: str):
args = {"x-dead-letter-exchange": "", "x-dead-letter-routing-key": CONFIG.rabbitmq.queues.dead_letter}
return channel.queue_declare(queue=queue, auto_delete=False, durable=True, arguments=args)
def make_connection() -> pika.BlockingConnection:
parameters = read_connection_params()
connection = pika.BlockingConnection(parameters)
return connection
def build_message_bodies(analyse_container_type):
def update_message(message_dict):
if analyse_container_type == "detr" or analyse_container_type == "image":
message_dict.update({"targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "IMAGE_INFO.json.gz"})
if analyse_container_type == "ner":
message_dict.update(
{"targetFileExtension": "TEXT.json.gz", "responseFileExtension": "NER_ENTITIES.json.gz"}
)
return message_dict
storage = get_s3_storage()
for bucket_name, pdf_name in storage.get_all_object_names(CONFIG.storage.bucket):
if "pdf" not in pdf_name:
continue
file_id = pdf_name.split(".")[0]
dossier_id, file_id = file_id.split("/")
message_dict = {"dossierId": dossier_id, "fileId": file_id}
update_message(message_dict)
yield json.dumps(message_dict).encode()
def main(args):
connection = make_connection()
channel = make_channel(connection)
declare_queue(channel, CONFIG.rabbitmq.queues.input)
declare_queue(channel, CONFIG.rabbitmq.queues.output)
for body in build_message_bodies(args.analysis_container):
channel.basic_publish("", CONFIG.rabbitmq.queues.input, body)
print(f"Put {body} on {CONFIG.rabbitmq.queues.input}")
for method_frame, _, body in channel.consume(queue=CONFIG.rabbitmq.queues.output):
print(f"Received {json.loads(body)}")
channel.basic_ack(method_frame.delivery_tag)
channel.close()
if __name__ == "__main__":
main(parse_args())