Julius Unverfehrt a1bfec765c Pull request #43: Image prediction v2 support
Merge in RR/pyinfra from image-prediction-v2-support to 2.0.0

Squashed commit of the following:

commit 37c536324e847357e86dd9b72d1e07ad792ed90f
Merge: 77d1db8 01bfb1d
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Mon Jul 11 13:53:56 2022 +0200

    Merge branch '2.0.0' of ssh://git.iqser.com:2222/rr/pyinfra into image-prediction-v2-support

commit 77d1db8e8630de8822c124eb39f4cd817ed1d3e1
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Mon Jul 11 13:07:41 2022 +0200

    add operation assignment via config if operation is not defined by caller

commit 36c8ca48a8c6151f713c093a23de110901ba6b02
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Mon Jul 11 10:33:34 2022 +0200

    refactor nothing part 2

commit f6cd0ef986802554dd544b9b7a24073d3b3f05b5
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Mon Jul 11 10:28:49 2022 +0200

    refactor nothing

commit 1e70d49531e89613c70903be49290b94ee014f65
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Wed Jul 6 17:42:12 2022 +0200

    enable docker-compose fixture

commit 9fee32cecdd120cfac3e065fb8ad2b4f37b49226
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Wed Jul 6 17:40:35 2022 +0200

    added 'multi' key to actual operation configurations

commit 4287f6d9878dd361489b8490eafd06f81df472ce
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Wed Jul 6 16:56:12 2022 +0200

    removed debug prints

commit 23a533e8f99222c7e598fb0864f65e9aa3508a3b
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Wed Jul 6 16:31:50 2022 +0200

    completed correcting / cleaning upload and download logic with regard to operations and ids. next: remove debug code

commit 33246d1ff94989d2ea70242c7ae2e58afa4d35c1
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Wed Jul 6 14:37:17 2022 +0200

    corrected / cleaned upload and download logic with regard to operations and ids

commit 7f2b4e882022c6843cb2f80df202caa495c54ee9
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Tue Jul 5 18:41:07 2022 +0200

    partially decomplected file descriptor manager from concrete and non-generic descriptor code

commit 40b892da17670dae3b8eba1700877c1dcf219852
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Tue Jul 5 09:53:46 2022 +0200

    typo

commit ec4fa8e6f4551ff1f8d4f78c484b7a260f274898
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Tue Jul 5 09:52:41 2022 +0200

    typo

commit 701b43403c328161fd96a73ce388a66035cca348
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Mon Jul 4 17:26:53 2022 +0200

    made adjustments for image classification with pyinfra 2.x; added related fixmes

commit 7a794bdcc987631cdc4d89b5620359464e2e018e
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Mon Jul 4 13:05:26 2022 +0200

    removed obsolete imports

commit 3fc6a7ef5d0172dbce1c4292d245eced2f378b5a
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Mon Jul 4 11:47:12 2022 +0200

    enable docker-compose fixture

commit 36d8d3bc851b06d94cf12a73048a00a67ef79c42
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Mon Jul 4 11:46:53 2022 +0200

    renaming

commit 3bf00d11cd041dff325b66f13fcd00d3ce96b8b5
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Thu Jun 30 12:47:57 2022 +0200

    refactoring: added cached pipeline factory

commit 90e735852af2f86e35be845fabf28494de952edb
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Wed Jun 29 13:47:08 2022 +0200

    renaming

commit 93b3d4b202b41183ed8cabe193a4bfa03f520787
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Wed Jun 29 13:25:03 2022 +0200

    further refactored server setup code: moving and decomplecting

commit 8b2ed83c7ade5bd811cb045d56fbfb0353fa385e
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Wed Jun 29 12:53:09 2022 +0200

    refactored server setup code: factored out and decoupled operation registry and prometheus summary registry

... and 6 more commits
2022-07-11 14:17:59 +02:00

166 lines
4.2 KiB
Python

from functools import partial
from itertools import starmap, repeat
import numpy as np
import pytest
from PIL import Image
from funcy import lmap, compose, flatten, lflatten, omit, second, first, lzip, merge
from pyinfra.server.normalization import normalize_item
from pyinfra.server.nothing import Nothing
from pyinfra.server.packing import pack, unpack
from pyinfra.utils.func import star, lift, lstarlift
from test.utils.image import image_to_bytes
from test.utils.pdf import pdf_stream
@pytest.fixture
def input_data_items(unencoded_input_data, input_data_encoder):
return input_data_encoder(unencoded_input_data)
@pytest.fixture
def unencoded_input_data(item_type, unencoded_strings, unencoded_images, unencoded_pdfs):
if item_type == "string":
return unencoded_strings
elif item_type == "image":
return unencoded_images
elif item_type == "pdf":
return unencoded_pdfs
else:
raise ValueError(f"Unknown item type {item_type}")
@pytest.fixture
def input_data_encoder(item_type):
if item_type == "string":
return strings_to_bytes
elif item_type == "image":
return images_to_bytes
elif item_type == "pdf":
return pdfs_to_bytes
else:
raise ValueError(f"Unknown item type {item_type}")
@pytest.fixture
def unencoded_pdfs(n_items, unencoded_pdf):
return [unencoded_pdf] * n_items
def pdfs_to_bytes(unencoded_pdfs):
return [pdf_stream(pdf) for pdf in unencoded_pdfs]
@pytest.fixture
def target_data_items(input_data_items, core_operation, metadata):
if core_operation is Nothing:
return Nothing
op = compose(normalize_item, core_operation)
expected = lflatten(starmap(op, zip(input_data_items, metadata)))
return expected
@pytest.fixture
def unencoded_strings(n_items):
return [f"content{i}" for i in range(n_items)]
def strings_to_bytes(strings):
return [bytes(s, encoding="utf8") for s in strings]
@pytest.fixture
def targets(data_message_pairs, input_data_items, operation, metadata, server_side_test, queue_message_metadata):
"""TODO: this has become super wonky"""
metadata = [{**m1, **m2} for m1, m2 in zip(lmap(second, data_message_pairs), metadata)]
if operation is Nothing:
return Nothing
op = compose(lift(star(pack)), normalize_item, operation)
try:
response_data, response_metadata = zip(*map(unpack, flatten(starmap(op, zip(input_data_items, metadata)))))
queue_message_keys = ["id"] * (not server_side_test) + [*first(queue_message_metadata).keys()]
response_metadata = lmap(partial(omit, keys=queue_message_keys), response_metadata)
expected = lzip(response_data, response_metadata)
except ValueError:
expected = []
return expected
@pytest.fixture
def endpoint(url, operation_name):
return f"{url}/{operation_name}"
@pytest.fixture(params=["rest", "basic"])
def client_pipeline_type(request):
return request.param
@pytest.fixture(params=[1, 0, 5])
def n_items(request):
return request.param
@pytest.fixture(params=[0, 100])
def n_pages(request):
return request.param
@pytest.fixture(params=[1, 5])
def buffer_size(request):
return request.param
def array_to_image(array) -> Image.Image:
return Image.fromarray(np.uint8(array * 255), mode="RGB")
def input_batch(n_items):
return np.random.random_sample(size=(n_items, 3, 30, 30))
@pytest.fixture
def unencoded_images(n_items):
return lmap(array_to_image, input_batch(n_items))
def images_to_bytes(images):
return lmap(image_to_bytes, images)
@pytest.fixture
def metadata(n_items, many_to_n):
"""storage metadata
TODO: rename
"""
return list(repeat({"key": "value"}, times=n_items))
@pytest.fixture
def queue_message_metadata(n_items, operation_name):
def metadata(i):
return merge(
{
"dossierId": "dossier_id",
"fileId": f"file_id_{i}",
},
({"operation": operation_name} if operation_name else {}),
({"pages": [0, 2, 3]} if n_items > 1 else {}),
)
return lmap(metadata, range(n_items))
@pytest.fixture
def packages(input_data_items, metadata):
return lstarlift(pack)(zip(input_data_items, metadata))