2022-06-03 13:48:33 +02:00

156 lines
4.1 KiB
Python

import gzip
import json
from itertools import starmap, repeat, chain
from operator import itemgetter
import pytest
from funcy import compose, lpluck
from pyinfra.default_objects import (
get_callback,
get_response_strategy,
get_consumer,
get_queue_manager,
get_storage,
)
from pyinfra.queue.consumer import Consumer
from pyinfra.server.packing import unpack, pack
from pyinfra.visitor import get_object_descriptor, QueueVisitor
from test.utils.input import pair_data_with_queue_message
@pytest.mark.parametrize(
"one_to_many",
[
False,
True,
],
)
@pytest.mark.parametrize(
"analysis_task",
[
False,
True,
],
)
@pytest.mark.parametrize("n_items", [1, 3])
@pytest.mark.parametrize("n_pages", [2])
@pytest.mark.parametrize("buffer_size", [2])
@pytest.mark.parametrize(
"item_type",
[
"string",
"image",
"pdf",
],
)
@pytest.mark.parametrize(
"queue_manager_name",
[
# "mock",
"pika",
],
scope="session",
)
@pytest.mark.parametrize(
"client_name",
[
# "mock",
"s3",
"azure",
],
scope="session",
)
@pytest.mark.parametrize(
"components_type",
[
# "test",
"real",
],
)
def test_serving(server_process, bucket_name, components, targets, data_message_pairs, n_items):
storage, queue_manager, consumer = components
upload_data_to_storage_and_publish_requests_to_queue(storage, queue_manager, data_message_pairs)
consumer.consume_and_publish(n=n_items)
outputs = get_data_uploaded_by_consumer(queue_manager, storage, bucket_name)
targets = sorted(targets, key=itemgetter(0))
assert outputs == targets
@pytest.fixture
def data_message_pairs(input_data_items, metadata):
data_metadata_packs = starmap(compose(lambda s: s.encode(), json.dumps, pack), zip(input_data_items, metadata))
data_message_pairs = pair_data_with_queue_message(data_metadata_packs)
return data_message_pairs
def upload_data_to_storage_and_publish_requests_to_queue(storage, queue_manager, data_message_pairs):
for data, message in data_message_pairs:
upload_data_to_storage_and_publish_request_to_queue(storage, queue_manager, data, message)
def upload_data_to_storage_and_publish_request_to_queue(storage, queue_manager, data, message):
storage.put_object(**get_object_descriptor(message), data=gzip.compress(data))
queue_manager.publish_request(message)
def get_data_uploaded_by_consumer(queue_manager, storage, bucket_name):
names_of_uploaded_files = lpluck("responseFile", queue_manager.output_queue.to_list())
uploaded_files = starmap(storage.get_object, zip(repeat(bucket_name), names_of_uploaded_files))
outputs = sorted(chain(*map(decode, uploaded_files)), key=itemgetter(0))
return outputs
@pytest.fixture
def components(components_type, real_components, test_components, bucket_name):
if components_type == "real":
components = real_components
elif components_type == "test":
components = test_components
else:
raise ValueError(f"Unknown components type '{components_type}'.")
storage, queue_manager, consumer = components
queue_manager.clear()
storage.clear_bucket(bucket_name)
yield storage, queue_manager, consumer
storage.clear_bucket(bucket_name)
def decode(storage_item):
storage_item = json.loads(gzip.decompress(storage_item).decode())
if not isinstance(storage_item, list):
storage_item = [storage_item]
yield from map(unpack, storage_item)
@pytest.fixture(params=["real", "mixed"])
def components_type(request):
return request.param
@pytest.fixture
def real_components(url):
callback = get_callback(url)
consumer = get_consumer(callback)
queue_manager = get_queue_manager()
storage = get_storage()
return storage, queue_manager, consumer
@pytest.fixture
def test_components(url, queue_manager, storage):
callback = get_callback(url)
visitor = QueueVisitor(storage, callback, get_response_strategy(storage))
consumer = Consumer(visitor, queue_manager)
return storage, queue_manager, consumer