modified core operations to return metadata for better classification mock test

This commit is contained in:
Matthias Bisping 2022-06-03 13:40:59 +02:00
parent eb81e96400
commit 9232385dea
2 changed files with 12 additions and 10 deletions

View File

@ -1,5 +1,6 @@
import io import io
import logging import logging
import random
import socket import socket
from itertools import repeat from itertools import repeat
from multiprocessing import Process from multiprocessing import Process
@ -65,11 +66,12 @@ def operation(core_operation):
def op(data, metadata): def op(data, metadata):
assert isinstance(metadata, dict) assert isinstance(metadata, dict)
result = core_operation(data, metadata) result = core_operation(data, metadata)
metadata = omit(metadata, ["pages", "operation"])
if isinstance(result, Generator): if isinstance(result, Generator):
return zip(result, repeat(metadata)) for data, metadata in result:
return data, omit(metadata, ["pages", "operation"])
else: else:
return result, metadata data, metadata = result
return data, omit(metadata, ["pages", "operation"])
if core_operation is Nothing: if core_operation is Nothing:
return Nothing return Nothing
@ -81,26 +83,26 @@ def core_operation(item_type, one_to_many, analysis_task):
def duplicate(string: bytes, metadata): def duplicate(string: bytes, metadata):
for _ in range(2): for _ in range(2):
yield upper(string, metadata) yield upper(string, metadata), metadata
def upper(string: bytes, metadata): def upper(string: bytes, metadata):
return string.decode().upper().encode() return string.decode().upper().encode()
def extract(string: bytes, metadata): def extract(string: bytes, metadata):
for c in project(dict(enumerate(string.decode())), metadata["pages"]).values(): for c in project(dict(enumerate(string.decode())), metadata["pages"]).values():
yield c.encode() yield c.encode(), metadata
def rotate(im: bytes, metadata): def rotate(im: bytes, metadata):
im = Image.open(io.BytesIO(im)) im = Image.open(io.BytesIO(im))
return image_to_bytes(im.rotate(90)) return image_to_bytes(im.rotate(90))
def classify(_: bytes, metadata): def classify(_: bytes, metadata):
return b"" return b"", {"classification": 1}
def stream_pages(pdf: bytes, metadata): def stream_pages(pdf: bytes, metadata):
for i, page in enumerate(fitz.open(stream=pdf)): for i, page in enumerate(fitz.open(stream=pdf)):
# yield page.get_pixmap().tobytes("png"), metadata # yield page.get_pixmap().tobytes("png"), metadata
yield f"page_{i}".encode() yield f"page_{i}".encode(), metadata
params2op = { params2op = {
False: { False: {

View File

@ -29,7 +29,7 @@ from test.utils.input import pair_data_with_queue_message
@pytest.mark.parametrize( @pytest.mark.parametrize(
"analysis_task", "analysis_task",
[ [
False, # False,
True, True,
], ],
) )
@ -39,9 +39,9 @@ from test.utils.input import pair_data_with_queue_message
@pytest.mark.parametrize( @pytest.mark.parametrize(
"item_type", "item_type",
[ [
"string", # "string",
"image", "image",
"pdf", # "pdf",
], ],
) )
@pytest.mark.parametrize( @pytest.mark.parametrize(