Merge in RR/pyinfra from 2.0.0-documentation to 2.0.0
Squashed commit of the following:
commit 7a794bdcc987631cdc4d89b5620359464e2e018e
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Mon Jul 4 13:05:26 2022 +0200
removed obsolete imports
commit 3fc6a7ef5d0172dbce1c4292d245eced2f378b5a
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Mon Jul 4 11:47:12 2022 +0200
enable docker-compose fixture
commit 36d8d3bc851b06d94cf12a73048a00a67ef79c42
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Mon Jul 4 11:46:53 2022 +0200
renaming
commit 3bf00d11cd041dff325b66f13fcd00d3ce96b8b5
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Thu Jun 30 12:47:57 2022 +0200
refactoring: added cached pipeline factory
commit 90e735852af2f86e35be845fabf28494de952edb
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Wed Jun 29 13:47:08 2022 +0200
renaming
commit 93b3d4b202b41183ed8cabe193a4bfa03f520787
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Wed Jun 29 13:25:03 2022 +0200
further refactored server setup code: moving and decomplecting
commit 8b2ed83c7ade5bd811cb045d56fbfb0353fa385e
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Wed Jun 29 12:53:09 2022 +0200
refactored server setup code: factored out and decoupled operation registry and prometheus summary registry
commit da2dce762bdd6889165fbb320dc9ee8a0bd089b2
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Tue Jun 28 19:40:04 2022 +0200
adjusted test target
commit 70df7911b9b92f4b72afd7d4b33ca2bbf136295e
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Tue Jun 28 19:32:38 2022 +0200
minor refactoring
commit 0937b63dc000346559bde353381304b273244109
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Mon Jun 27 13:59:59 2022 +0200
support for empty operation suffix
commit 5e56917970962a2e69bbd66a324bdb4618c040bd
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Mon Jun 27 12:52:36 2022 +0200
minor refactoring
commit 40665a7815ae5927b3877bda14fb77deef37d667
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Mon Jun 27 10:57:04 2022 +0200
optimization: prefix filtering via storage API for get_all_object_names
commit af0892a899d09023eb0e61eecb63e03dc2fd3b60
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Mon Jun 27 10:55:47 2022 +0200
topological sorting of definitions by caller hierarchy
200 lines
5.0 KiB
Python
200 lines
5.0 KiB
Python
import io
|
|
import logging
|
|
import socket
|
|
from collections import Counter
|
|
from multiprocessing import Process
|
|
from operator import itemgetter
|
|
from typing import Generator
|
|
|
|
import fitz
|
|
import pytest
|
|
import requests
|
|
from PIL import Image
|
|
from funcy import retry, project, omit
|
|
from waitress import serve
|
|
|
|
from pyinfra.server.dispatcher.dispatcher import Nothing
|
|
from pyinfra.server.server import (
|
|
set_up_processing_server,
|
|
)
|
|
from pyinfra.server.utils import make_streamable_and_wrap_in_packing_logic
|
|
from pyinfra.utils.func import starlift
|
|
from test.utils.image import image_to_bytes
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@pytest.fixture
|
|
def host():
|
|
return "0.0.0.0"
|
|
|
|
|
|
def get_free_port(host):
|
|
sock = socket.socket()
|
|
sock.bind((host, 0))
|
|
return sock.getsockname()[1]
|
|
|
|
|
|
@pytest.fixture
|
|
def port(host):
|
|
return get_free_port(host)
|
|
|
|
|
|
@pytest.fixture
|
|
def url(host, port):
|
|
return f"http://{host}:{port}"
|
|
|
|
|
|
@pytest.fixture
|
|
def server(server_stream_function, buffer_size, operation_name):
|
|
return set_up_processing_server({operation_name: server_stream_function}, buffer_size)
|
|
|
|
|
|
@pytest.fixture
|
|
def operation_name(many_to_n):
|
|
return "multi_inp_op" if many_to_n else ""
|
|
|
|
|
|
@pytest.fixture
|
|
def server_stream_function(operation_conditionally_batched, batched):
|
|
return make_streamable_and_wrap_in_packing_logic(operation_conditionally_batched, batched)
|
|
|
|
|
|
@pytest.fixture
|
|
def operation_conditionally_batched(operation, batched):
|
|
return starlift(operation) if batched else operation
|
|
|
|
|
|
@pytest.fixture
|
|
def operation(core_operation, server_side_test):
|
|
auto_counter = Counter()
|
|
|
|
def auto_count(metadata):
|
|
if not server_side_test:
|
|
idnt = itemgetter("dossierId", "fileId")(metadata)
|
|
auto_counter[idnt] += 1
|
|
return {**metadata, "id": auto_counter[idnt]} if "id" not in metadata else metadata
|
|
else:
|
|
return metadata
|
|
|
|
def op(data, metadata):
|
|
assert isinstance(metadata, dict)
|
|
result = core_operation(data, metadata)
|
|
if isinstance(result, Generator):
|
|
for data, metadata in result:
|
|
yield data, auto_count(omit(metadata, ["pages", "operation"]))
|
|
else:
|
|
data, metadata = result
|
|
yield data, auto_count(omit(metadata, ["pages", "operation"]))
|
|
|
|
if core_operation is Nothing:
|
|
return Nothing
|
|
|
|
return op
|
|
|
|
|
|
@pytest.fixture(params=[False])
|
|
def server_side_test(request):
|
|
return request.param
|
|
|
|
|
|
@pytest.fixture
|
|
def core_operation(item_type, one_to_many, analysis_task):
|
|
def duplicate(string: bytes, metadata):
|
|
for _ in range(2):
|
|
yield upper(string, metadata), metadata
|
|
|
|
def upper(string: bytes, metadata):
|
|
return string.decode().upper().encode(), metadata
|
|
|
|
def extract(string: bytes, metadata):
|
|
for i, c in project(dict(enumerate(string.decode())), metadata["pages"]).items():
|
|
metadata["id"] = i
|
|
yield c.encode(), metadata
|
|
|
|
def rotate(im: bytes, metadata):
|
|
im = Image.open(io.BytesIO(im))
|
|
return image_to_bytes(im.rotate(90)), metadata
|
|
|
|
def classify(_: bytes, metadata):
|
|
return b"", {"classification": 1, **metadata}
|
|
|
|
def stream_pages(pdf: bytes, metadata):
|
|
for i, page in enumerate(fitz.open(stream=pdf)):
|
|
# yield page.get_pixmap().tobytes("png"), metadata
|
|
metadata["id"] = i
|
|
yield f"page_{i}".encode(), metadata
|
|
|
|
params2op = {
|
|
False: {
|
|
"string": {False: upper},
|
|
"image": {False: rotate, True: classify},
|
|
},
|
|
True: {
|
|
"string": {False: extract},
|
|
"pdf": {False: stream_pages},
|
|
},
|
|
}
|
|
|
|
try:
|
|
return params2op[one_to_many][item_type][analysis_task]
|
|
except KeyError:
|
|
msg = f"No operation defined for [{one_to_many=}, {item_type=}, {analysis_task=}]."
|
|
pytest.skip(msg)
|
|
logger.debug(msg)
|
|
return Nothing
|
|
|
|
|
|
@pytest.fixture(params=["pdf", "string", "image"])
|
|
def item_type(request):
|
|
return request.param
|
|
|
|
|
|
@pytest.fixture(params=[True, False])
|
|
def one_to_many(request):
|
|
return request.param
|
|
|
|
|
|
@pytest.fixture(params=[True, False])
|
|
def many_to_n(request):
|
|
return request.param
|
|
|
|
|
|
@pytest.fixture(params=[True, False])
|
|
def analysis_task(request):
|
|
return request.param
|
|
|
|
|
|
@pytest.fixture(params=[False, True])
|
|
def batched(request):
|
|
"""Controls, whether the buffer processor function of the webserver is applied to batches or single items."""
|
|
return request.param
|
|
|
|
|
|
@pytest.fixture
|
|
def host_and_port(host, port):
|
|
return {"host": host, "port": port}
|
|
|
|
|
|
@retry(tries=5, timeout=1)
|
|
def server_ready(url):
|
|
response = requests.get(f"{url}/ready")
|
|
response.raise_for_status()
|
|
return response.status_code == 200
|
|
|
|
|
|
@pytest.fixture(autouse=False, scope="function")
|
|
def server_process(server, host_and_port, url):
|
|
def get_server_process():
|
|
return Process(target=serve, kwargs={"app": server, **host_and_port})
|
|
|
|
server = get_server_process()
|
|
server.start()
|
|
|
|
if server_ready(url):
|
|
yield
|
|
|
|
server.kill()
|
|
server.join()
|
|
server.close()
|