Pull request #43 : Image prediction v2 support

Merge in RR/pyinfra from image-prediction-v2-support to 2.0.0 Squashed commit of the following: commit 37c536324e847357e86dd9b72d1e07ad792ed90f Merge: 77d1db8 01bfb1d Author: Julius Unverfehrt <julius.unverfehrt@iqser.com> Date: Mon Jul 11 13:53:56 2022 +0200 Merge branch '2.0.0' of ssh://git.iqser.com:2222/rr/pyinfra into image-prediction-v2-support commit 77d1db8e8630de8822c124eb39f4cd817ed1d3e1 Author: Julius Unverfehrt <julius.unverfehrt@iqser.com> Date: Mon Jul 11 13:07:41 2022 +0200 add operation assignment via config if operation is not defined by caller commit 36c8ca48a8c6151f713c093a23de110901ba6b02 Author: Julius Unverfehrt <julius.unverfehrt@iqser.com> Date: Mon Jul 11 10:33:34 2022 +0200 refactor nothing part 2 commit f6cd0ef986802554dd544b9b7a24073d3b3f05b5 Author: Julius Unverfehrt <julius.unverfehrt@iqser.com> Date: Mon Jul 11 10:28:49 2022 +0200 refactor nothing commit 1e70d49531e89613c70903be49290b94ee014f65 Author: Matthias Bisping <matthias.bisping@iqser.com> Date: Wed Jul 6 17:42:12 2022 +0200 enable docker-compose fixture commit 9fee32cecdd120cfac3e065fb8ad2b4f37b49226 Author: Matthias Bisping <matthias.bisping@iqser.com> Date: Wed Jul 6 17:40:35 2022 +0200 added 'multi' key to actual operation configurations commit 4287f6d9878dd361489b8490eafd06f81df472ce Author: Matthias Bisping <matthias.bisping@iqser.com> Date: Wed Jul 6 16:56:12 2022 +0200 removed debug prints commit 23a533e8f99222c7e598fb0864f65e9aa3508a3b Author: Matthias Bisping <matthias.bisping@iqser.com> Date: Wed Jul 6 16:31:50 2022 +0200 completed correcting / cleaning upload and download logic with regard to operations and ids. next: remove debug code commit 33246d1ff94989d2ea70242c7ae2e58afa4d35c1 Author: Matthias Bisping <matthias.bisping@iqser.com> Date: Wed Jul 6 14:37:17 2022 +0200 corrected / cleaned upload and download logic with regard to operations and ids commit 7f2b4e882022c6843cb2f80df202caa495c54ee9 Author: Matthias Bisping <matthias.bisping@iqser.com> Date: Tue Jul 5 18:41:07 2022 +0200 partially decomplected file descriptor manager from concrete and non-generic descriptor code commit 40b892da17670dae3b8eba1700877c1dcf219852 Author: Matthias Bisping <matthias.bisping@iqser.com> Date: Tue Jul 5 09:53:46 2022 +0200 typo commit ec4fa8e6f4551ff1f8d4f78c484b7a260f274898 Author: Matthias Bisping <matthias.bisping@iqser.com> Date: Tue Jul 5 09:52:41 2022 +0200 typo commit 701b43403c328161fd96a73ce388a66035cca348 Author: Matthias Bisping <matthias.bisping@iqser.com> Date: Mon Jul 4 17:26:53 2022 +0200 made adjustments for image classification with pyinfra 2.x; added related fixmes commit 7a794bdcc987631cdc4d89b5620359464e2e018e Author: Matthias Bisping <matthias.bisping@iqser.com> Date: Mon Jul 4 13:05:26 2022 +0200 removed obsolete imports commit 3fc6a7ef5d0172dbce1c4292d245eced2f378b5a Author: Matthias Bisping <matthias.bisping@iqser.com> Date: Mon Jul 4 11:47:12 2022 +0200 enable docker-compose fixture commit 36d8d3bc851b06d94cf12a73048a00a67ef79c42 Author: Matthias Bisping <matthias.bisping@iqser.com> Date: Mon Jul 4 11:46:53 2022 +0200 renaming commit 3bf00d11cd041dff325b66f13fcd00d3ce96b8b5 Author: Matthias Bisping <matthias.bisping@iqser.com> Date: Thu Jun 30 12:47:57 2022 +0200 refactoring: added cached pipeline factory commit 90e735852af2f86e35be845fabf28494de952edb Author: Matthias Bisping <matthias.bisping@iqser.com> Date: Wed Jun 29 13:47:08 2022 +0200 renaming commit 93b3d4b202b41183ed8cabe193a4bfa03f520787 Author: Matthias Bisping <matthias.bisping@iqser.com> Date: Wed Jun 29 13:25:03 2022 +0200 further refactored server setup code: moving and decomplecting commit 8b2ed83c7ade5bd811cb045d56fbfb0353fa385e Author: Matthias Bisping <matthias.bisping@iqser.com> Date: Wed Jun 29 12:53:09 2022 +0200 refactored server setup code: factored out and decoupled operation registry and prometheus summary registry ... and 6 more commits
Pull request #40 : 2.0.0 documentation
2022-07-11 14:17:59 +02:00 · 2022-07-11 09:09:44 +02:00 · 2022-06-24 12:59:26 +02:00 · 2022-06-20 11:43:33 +02:00 · 2022-06-20 10:27:36 +02:00 · 2022-06-20 08:29:33 +02:00
125 changed files with 3287 additions and 279 deletions
--- a/README.md
+++ b/README.md
@ -36,8 +36,6 @@ A configuration is located in `/config.yaml`. All relevant variables can be conf
 {
   "dossierId": "",
   "fileId": "",
-   "targetFileExtension": "",
-   "responseFileExtension": ""
 }
 ```

--- a/config.yaml
+++ b/config.yaml
@ -1,5 +1,52 @@
 service:
  logging_level: $LOGGING_LEVEL_ROOT|DEBUG  # Logging level for service logger
+  name: $SERVICE_NAME|research  # Default service name for research service, used for prometheus metric name
+  response_formatter: default  # formats analysis payloads of response messages
+  upload_formatter: projecting  # formats analysis payloads of objects uploaded to storage
+  # Note: This is not really the right place for this. It should be configured on a per-service basis.
+  operation: $OPERATION|default
+  # operation needs to be specified in deployment config for services that are called without an operation specified
+  operations:
+    conversion:
+      input:
+        multi: False
+        subdir: ""
+        extension: ORIGIN.pdf.gz
+      output:
+        subdir: "pages_as_images"
+        extension: json.gz
+    extraction:
+      input:
+        multi: False
+        subdir: ""
+        extension: ORIGIN.pdf.gz
+      output:
+        subdir: "extracted_images"
+        extension: json.gz
+    table_parsing:
+      input:
+        multi: True
+        subdir: "pages_as_images"
+        extension: json.gz
+      output:
+        subdir: "table_parses"
+        extension: json.gz
+    image_classification:
+      input:
+        multi: True
+        subdir: "extracted_images"
+        extension: json.gz
+      output:
+        subdir: ""
+        extension: IMAGE_INFO.json.gz
+    default:
+      input:
+        multi: False
+        subdir: ""
+        extension: in.gz
+      output:
+        subdir: ""
+        extension: out.gz

 probing_webserver:
  host: $PROBING_WEBSERVER_HOST|"0.0.0.0"  # Probe webserver address
@ -33,3 +80,8 @@ storage:

  azure:
    connection_string: $STORAGE_AZURECONNECTIONSTRING|"DefaultEndpointsProtocol=https;AccountName=iqserdevelopment;AccountKey=4imAbV9PYXaztSOMpIyAClg88bAZCXuXMGJG0GA1eIBpdh2PlnFGoRBnKqLy2YZUSTmZ3wJfC7tzfHtuC6FEhQ==;EndpointSuffix=core.windows.net"
+
+retry:
+  tries: 3
+  delay: 5
+  jitter: [1, 3]
--- a/doc/signatures.txt
+++ b/doc/signatures.txt
@ -0,0 +1,76 @@
+Processing service interface
+
+image classification now   : JSON (Mdat PDF) -> (Data PDF -> JSON [Mdat ImObj]
+image classification future: JSON [Mdat FunkIm] | Mdat PDF ->  (Data [FunkIm] -> JSON [Mdat FunkIm])
+object detection           : JSON [Mdat PagIm] | Mdat PDF -> (Data [PagIm] -> JSON [[Mdat SemIm]])
+NER                        : JSON [Mdat Dict] -> (Data [Dict] -> JSON [Mdat])
+table parsing              : JSON [Mdat FunkIm] | Mdat PDF -> (Data [PagIm] -> JSON [[Mdat FunkIm]])
+pdf2image                  : Mdat (fn, [Int], PDF) -> (JSON ([Int], Data PDF) -> [(FunkIm, Mdat)])
+
+
+image classification now   : Mdat (fn, [Int], file) -> (Data PDF -> JSON [Mdat ImObj]
+image classification future: Mdat (fn, [Int], dir) -> (Data [FunkIm] -> JSON [Mdat FunkIm])
+object detection           : Mdat (fn, [Int], dir) -> (Data [PagIm] -> JSON [[Mdat SemIm]])
+table parsing              : Mdat (fn, [Int], dir) -> (Data [PagIm] -> JSON [[Mdat FunkIm]])
+NER                        : Mdat (fn, [Int], file) -> (Data [Dict] -> JSON [Mdat])
+pdf2image                  : Mdat (fn, [Int], file) -> (JSON ([Int], Data PDF) -> [(FunkIm, Mdat)])
+
+
+from funcy import identity
+
+access(mdat):
+	if mdat.path is file:
+		request = {"data": load(mdat.path), "metadata": mdat}
+	elif mdat.path is dir:
+		get_indexed = identity if not mdat.idx else itemgetter(*mdat.idx)
+		request = {"data": get_indexed(get_files(mdat.path)), "metadata": mdat}
+	else:
+		raise BadRequest
+
+
+storage:
+
+fileId: {
+	pages: [PagIm]
+	images: [FunkIm]
+	sections: gz
+}
+
+
+---------------
+
+
+
+							 assert if targetPath is file then response list must be singleton
+                             {index: [], dir: fileID.pdf.gz, targetPath: fileID.images.json.gz} -> [{data: pdf bytes, metadata: request: ...] -> [{data: null, metadata: request: null, response: {classification infos: ...}]
+image classification now   : Mdat (fn, [Int], file) -> [JSON (Data PDF, Mdat)] -> [JSON (Data null, Mdat [ImObj])]  | 1 -> 1
+							 assert if targetPath is file then response list must be singleton
+                             {index: [], dir: fileID/images, targetPath: fileID.images.json.gz} -> [{data: image bytes, metadata: request: {image location...}] -> [{data: null, metadata: request: null, response: {classification infos: ...}]
+image classification future: Mdat (fn, [Int], dir) -> JSON (Data [FunkIm], Mdat) -> [JSON (Data null, Mdat [FunkIm])]                 |
+object detection           : Mdat (fn, [Int], dir) -> (Data [PagIm] -> JSON [[Mdat SemIm]])
+table parsing              : Mdat (fn, [Int], dir) -> (Data [PagIm] -> JSON [[Mdat FunkIm]])
+NER                        : Mdat (fn, [Int], file) -> (Data [Dict] -> JSON [Mdat])
+pdf2image                  : Mdat (fn, [Int], file) -> (JSON ([Int], Data PDF) -> [(FunkIm, Mdat)])
+
+ aggregate <==> targetpath is file and index is empty
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -1,7 +1,7 @@
 version: '2'
 services:
  minio:
-    image: minio/minio
+    image: minio/minio:RELEASE.2022-06-11T19-55-32Z
    ports:
      - "9000:9000"
    environment:
@ -12,7 +12,7 @@ services:
    command: server /data
    network_mode: "bridge" 
  rabbitmq:
-    image: docker.io/bitnami/rabbitmq:3.9
+    image: docker.io/bitnami/rabbitmq:3.9.8
    ports:
      - '4369:4369'
      - '5551:5551'
--- a/pyinfra/callback.py
+++ b/pyinfra/callback.py
@ -0,0 +1,65 @@
+import logging
+
+from funcy import merge, omit, lmap
+
+from pyinfra.exceptions import AnalysisFailure
+from pyinfra.pipeline_factory import CachedPipelineFactory
+
+logger = logging.getLogger(__name__)
+
+
+class Callback:
+    """This is the callback that is applied to items pulled from the storage. It forwards these items to an analysis
+    endpoint.
+    """
+
+    def __init__(self, pipeline_factory: CachedPipelineFactory):
+        self.pipeline_factory = pipeline_factory
+
+    def __get_pipeline(self, endpoint):
+        return self.pipeline_factory.get_pipeline(endpoint)
+
+    @staticmethod
+    def __run_pipeline(pipeline, analysis_input: dict):
+        """
+        TODO: Since data and metadata are passed as singletons, there is no buffering and hence no batching happening
+         within the pipeline. However, the queue acknowledgment logic needs to be changed in order to facilitate
+         passing non-singletons, to only ack a message, once a response is pulled from the output queue of the
+         pipeline. Probably the pipeline return value needs to contains the queue message frame (or so), in order for
+         the queue manager to tell which message to ack.
+
+        TODO: casting list (lmap) on `analysis_response_stream` is a temporary solution, while the client pipeline
+         operates on singletons ([data], [metadata]).
+        """
+
+        def combine_storage_item_metadata_with_queue_message_metadata(analysis_input):
+            return merge(analysis_input["metadata"], omit(analysis_input, ["data", "metadata"]))
+
+        def remove_queue_message_metadata(analysis_result):
+            metadata = omit(analysis_result["metadata"], queue_message_keys(analysis_input))
+            return {**analysis_result, "metadata": metadata}
+
+        def queue_message_keys(analysis_input):
+            return {*analysis_input.keys()}.difference({"data", "metadata"})
+
+        try:
+            data = analysis_input["data"]
+            metadata = combine_storage_item_metadata_with_queue_message_metadata(analysis_input)
+            analysis_response_stream = pipeline([data], [metadata])
+            analysis_response_stream = lmap(remove_queue_message_metadata, analysis_response_stream)
+            return analysis_response_stream
+
+        except Exception as err:
+            logger.error(err)
+            raise AnalysisFailure from err
+
+    def __call__(self, analysis_input: dict):
+        """data_metadata_pack: {'dossierId': ..., 'fileId': ..., 'pages': ..., 'operation': ...}"""
+        operation = analysis_input.get("operation", "")
+        pipeline = self.__get_pipeline(operation)
+
+        try:
+            logging.debug(f"Requesting analysis for operation '{operation}'...")
+            return self.__run_pipeline(pipeline, analysis_input)
+        except AnalysisFailure:
+            logging.warning(f"Exception caught when calling analysis endpoint for operation '{operation}'.")
--- a/pyinfra/component_factory.py
+++ b/pyinfra/component_factory.py
@ -0,0 +1,120 @@
+import logging
+from functools import lru_cache
+
+from funcy import project, identity, rcompose
+
+from pyinfra.callback import Callback
+from pyinfra.config import parse_disjunction_string
+from pyinfra.file_descriptor_builder import RedFileDescriptorBuilder
+from pyinfra.file_descriptor_manager import FileDescriptorManager
+from pyinfra.pipeline_factory import CachedPipelineFactory
+from pyinfra.queue.consumer import Consumer
+from pyinfra.queue.queue_manager.pika_queue_manager import PikaQueueManager
+from pyinfra.server.client_pipeline import ClientPipeline
+from pyinfra.server.dispatcher.dispatchers.rest import RestDispatcher
+from pyinfra.server.interpreter.interpreters.rest_callback import RestPickupStreamer
+from pyinfra.server.packer.packers.rest import RestPacker
+from pyinfra.server.receiver.receivers.rest import RestReceiver
+from pyinfra.storage import storages
+from pyinfra.visitor import QueueVisitor
+from pyinfra.visitor.downloader import Downloader
+from pyinfra.visitor.response_formatter.formatters.default import DefaultResponseFormatter
+from pyinfra.visitor.response_formatter.formatters.identity import IdentityResponseFormatter
+from pyinfra.visitor.strategies.response.aggregation import AggregationStorageStrategy, ProjectingUploadFormatter
+
+logger = logging.getLogger(__name__)
+
+
+class ComponentFactory:
+    def __init__(self, config):
+        self.config = config
+
+    @lru_cache(maxsize=None)
+    def get_consumer(self, callback=None):
+        callback = callback or self.get_callback()
+        return Consumer(self.get_visitor(callback), self.get_queue_manager())
+
+    @lru_cache(maxsize=None)
+    def get_callback(self, analysis_base_url=None):
+        analysis_base_url = analysis_base_url or self.config.rabbitmq.callback.analysis_endpoint
+
+        callback = Callback(CachedPipelineFactory(base_url=analysis_base_url, pipeline_factory=self.get_pipeline))
+
+        def wrapped(body):
+            body_repr = project(body, ["dossierId", "fileId", "operation"])
+            logger.info(f"Processing {body_repr}...")
+            result = callback(body)
+            logger.info(f"Completed processing {body_repr}...")
+            return result
+
+        return wrapped
+
+    @lru_cache(maxsize=None)
+    def get_visitor(self, callback):
+        return QueueVisitor(
+            callback=callback,
+            data_loader=self.get_downloader(),
+            response_strategy=self.get_response_strategy(),
+            response_formatter=self.get_response_formatter(),
+        )
+
+    @lru_cache(maxsize=None)
+    def get_queue_manager(self):
+        return PikaQueueManager(self.config.rabbitmq.queues.input, self.config.rabbitmq.queues.output)
+
+    @staticmethod
+    @lru_cache(maxsize=None)
+    def get_pipeline(endpoint):
+        return ClientPipeline(
+            RestPacker(), RestDispatcher(endpoint), RestReceiver(), rcompose(RestPickupStreamer(), RestReceiver())
+        )
+
+    @lru_cache(maxsize=None)
+    def get_storage(self):
+        return storages.get_storage(self.config.storage.backend)
+
+    @lru_cache(maxsize=None)
+    def get_response_strategy(self, storage=None):
+        return AggregationStorageStrategy(
+            storage=storage or self.get_storage(),
+            file_descriptor_manager=self.get_file_descriptor_manager(),
+            upload_formatter=self.get_upload_formatter(),
+        )
+
+    @lru_cache(maxsize=None)
+    def get_file_descriptor_manager(self):
+        return FileDescriptorManager(
+            bucket_name=parse_disjunction_string(self.config.storage.bucket),
+            file_descriptor_builder=self.get_operation_file_descriptor_builder(),
+        )
+
+    @lru_cache(maxsize=None)
+    def get_upload_formatter(self):
+        return {"identity": identity, "projecting": ProjectingUploadFormatter()}[self.config.service.upload_formatter]
+
+    @lru_cache(maxsize=None)
+    def get_operation_file_descriptor_builder(self):
+        return RedFileDescriptorBuilder(
+            operation2file_patterns=self.get_operation2file_patterns(),
+            default_operation_name=self.config.service.operation,
+        )
+
+    @lru_cache(maxsize=None)
+    def get_response_formatter(self):
+        return {"default": DefaultResponseFormatter(), "identity": IdentityResponseFormatter()}[
+            self.config.service.response_formatter
+        ]
+
+    @lru_cache(maxsize=None)
+    def get_operation2file_patterns(self):
+        if self.config.service.operation is not "default":
+            self.config.service.operations["default"] = self.config.service.operations[self.config.service.operation]
+        return self.config.service.operations
+
+    @lru_cache(maxsize=None)
+    def get_downloader(self, storage=None):
+        return Downloader(
+            storage=storage or self.get_storage(),
+            bucket_name=parse_disjunction_string(self.config.storage.bucket),
+            file_descriptor_manager=self.get_file_descriptor_manager(),
+        )
--- a/pyinfra/config.py
+++ b/pyinfra/config.py
@ -1,10 +1,13 @@
 """Implements a config object with dot-indexing syntax."""
 import os
+from functools import partial
 from itertools import chain
 from operator import truth
+from typing import Iterable

 from envyaml import EnvYAML
-from funcy import first, juxt, butlast, last
+from frozendict import frozendict
+from funcy import first, juxt, butlast, last, lmap

 from pyinfra.locations import CONFIG_FILE

@ -27,6 +30,9 @@ class DotIndexable:
    def __getitem__(self, item):
        return self.__getattr__(item)

+    def __setitem__(self, key, value):
+        self.x[key] = value
+

 class Config:
    def __init__(self, config_path):
@ -39,6 +45,29 @@ class Config:
    def __getitem__(self, item):
        return self.__getattr__(item)

+    def __setitem__(self, key, value):
+        self.__config.key = value
+
+    def to_dict(self, frozen=True):
+        return to_dict(self.__config.export(), frozen=frozen)
+
+    def __hash__(self):
+        return hash(self.to_dict())
+
+
+def to_dict(v, frozen=True):
+    def make_dict(*args, **kwargs):
+        return (frozendict if frozen else dict)(*args, **kwargs)
+
+    if isinstance(v, list):
+        return tuple(map(partial(to_dict, frozen=frozen), v))
+    elif isinstance(v, DotIndexable):
+        return make_dict({k: to_dict(v, frozen=frozen) for k, v in v.x.items()})
+    elif isinstance(v, dict):
+        return make_dict({k: to_dict(v, frozen=frozen) for k, v in v.items()})
+    else:
+        return v
+

 CONFIG = Config(CONFIG_FILE)

--- a/pyinfra/default_objects.py
+++ b/pyinfra/default_objects.py
@ -0,0 +1,8 @@
+from functools import lru_cache
+
+from pyinfra.component_factory import ComponentFactory
+
+
+@lru_cache(maxsize=None)
+def get_component_factory(config):
+    return ComponentFactory(config)
--- a/pyinfra/exceptions.py
+++ b/pyinfra/exceptions.py
@ -32,3 +32,19 @@ class NoSuchContainer(KeyError):

 class IntentionalTestException(RuntimeError):
    pass
+
+
+class UnexpectedItemType(ValueError):
+    pass
+
+
+class NoBufferCapacity(ValueError):
+    pass
+
+
+class InvalidMessage(ValueError):
+    pass
+
+
+class InvalidStorageItemFormat(ValueError):
+    pass
--- a/pyinfra/file_descriptor_builder.py
+++ b/pyinfra/file_descriptor_builder.py
@ -0,0 +1,99 @@
+import abc
+import os
+from operator import itemgetter
+
+from funcy import project
+
+
+class FileDescriptorBuilder:
+    @abc.abstractmethod
+    def build_file_descriptor(self, queue_item_body, end="input"):
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def build_matcher(self, file_descriptor):
+        raise NotImplementedError
+
+    @staticmethod
+    @abc.abstractmethod
+    def build_storage_upload_info(analysis_payload, request_metadata):
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def get_path_prefix(self, queue_item_body):
+        raise NotImplementedError
+
+
+class RedFileDescriptorBuilder(FileDescriptorBuilder):
+    """Defines concrete descriptors for storage objects based on queue messages"""
+
+    def __init__(self, operation2file_patterns, default_operation_name):
+
+        self.operation2file_patterns = operation2file_patterns or self.get_default_operation2file_patterns()
+        self.default_operation_name = default_operation_name
+
+    @staticmethod
+    def get_default_operation2file_patterns():
+        return {"default": {"input": {"subdir": "", "extension": ".in"}, "output": {"subdir": "", "extension": ".out"}}}
+
+    def build_file_descriptor(self, queue_item_body, end="input"):
+
+        def pages():
+            if end == "input":
+                if "id" in queue_item_body:
+                    return [queue_item_body["id"]]
+                else:
+                    return queue_item_body["pages"] if file_pattern["multi"] else []
+            elif end == "output":
+                return [queue_item_body["id"]]
+            else:
+                raise ValueError(f"Invalid argument: {end=}")  # TODO: use an enum for `end`
+
+        operation = queue_item_body.get("operation", self.default_operation_name)
+
+        file_pattern = self.operation2file_patterns[operation][end]
+
+        file_descriptor = {
+            **project(queue_item_body, ["dossierId", "fileId", "pages"]),
+            "pages": pages(),
+            "extension": file_pattern["extension"],
+            "subdir": file_pattern["subdir"],
+        }
+
+        return file_descriptor
+
+    def build_matcher(self, file_descriptor):
+        def make_filename(file_id, subdir, suffix):
+            return os.path.join(file_id, subdir, suffix) if subdir else f"{file_id}.{suffix}"
+
+        dossier_id, file_id, subdir, pages, extension = itemgetter(
+            "dossierId", "fileId", "subdir", "pages", "extension"
+        )(file_descriptor)
+
+        matcher = os.path.join(
+            dossier_id, make_filename(file_id, subdir, self.__build_page_regex(pages, subdir) + extension)
+        )
+
+        return matcher
+
+    @staticmethod
+    def __build_page_regex(pages, subdir):
+
+        n_pages = len(pages)
+        if n_pages > 1:
+            page_re = "id:(" + "|".join(map(str, pages)) + ")."
+        elif n_pages == 1:
+            page_re = f"id:{pages[0]}."
+        else:  # no pages specified -> either all pages or no pages, depending on whether a subdir is specified
+            page_re = r"id:\d+." if subdir else ""
+
+        return page_re
+
+    @staticmethod
+    def build_storage_upload_info(analysis_payload, request_metadata):
+        storage_upload_info = {**request_metadata, "id": analysis_payload["metadata"].get("id", 0)}
+        return storage_upload_info
+
+    def get_path_prefix(self, queue_item_body):
+        prefix = "/".join(itemgetter("dossierId", "fileId")(self.build_file_descriptor(queue_item_body, end="input")))
+        return prefix
--- a/pyinfra/file_descriptor_manager.py
+++ b/pyinfra/file_descriptor_manager.py
@ -0,0 +1,63 @@
+from pyinfra.file_descriptor_builder import FileDescriptorBuilder
+
+
+class FileDescriptorManager:
+    """Decorates a file descriptor builder with additional convenience functionality and this way provides a
+    comprehensive interface for all file descriptor related operations, while the concrete descriptor logic is
+    implemented in a file descriptor builder.
+
+    TODO: This is supposed to be fully decoupled from the concrete file descriptor builder implementation, however some
+        bad coupling is still left.
+    """
+
+    def __init__(self, bucket_name, file_descriptor_builder: FileDescriptorBuilder):
+        self.bucket_name = bucket_name
+        self.operation_file_descriptor_builder = file_descriptor_builder
+
+    def get_input_object_name(self, queue_item_body: dict):
+        return self.get_object_name(queue_item_body, end="input")
+
+    def get_output_object_name(self, queue_item_body: dict):
+        return self.get_object_name(queue_item_body, end="output")
+
+    def get_object_name(self, queue_item_body: dict, end):
+        file_descriptor = self.build_file_descriptor(queue_item_body, end=end)
+        object_name = self.__build_matcher(file_descriptor)
+
+        return object_name
+
+    def build_file_descriptor(self, queue_item_body, end="input"):
+        return self.operation_file_descriptor_builder.build_file_descriptor(queue_item_body, end=end)
+
+    def build_input_matcher(self, queue_item_body):
+        return self.build_matcher(queue_item_body, end="input")
+
+    def build_output_matcher(self, queue_item_body):
+        return self.build_matcher(queue_item_body, end="output")
+
+    def build_matcher(self, queue_item_body, end):
+        file_descriptor = self.build_file_descriptor(queue_item_body, end=end)
+        return self.__build_matcher(file_descriptor)
+
+    def __build_matcher(self, file_descriptor):
+        return self.operation_file_descriptor_builder.build_matcher(file_descriptor)
+
+    def get_input_object_descriptor(self, queue_item_body):
+        return self.get_object_descriptor(queue_item_body, end="input")
+
+    def get_output_object_descriptor(self, storage_upload_info):
+        return self.get_object_descriptor(storage_upload_info, end="output")
+
+    def get_object_descriptor(self, queue_item_body, end):
+        # TODO: this is complected with the Storage class API
+        # FIXME: bad coupling
+        return {
+            "bucket_name": self.bucket_name,
+            "object_name": self.get_object_name(queue_item_body, end=end),
+        }
+
+    def build_storage_upload_info(self, analysis_payload, request_metadata):
+        return self.operation_file_descriptor_builder.build_storage_upload_info(analysis_payload, request_metadata)
+
+    def get_path_prefix(self, queue_item_body):
+        return self.operation_file_descriptor_builder.get_path_prefix(queue_item_body)
--- a/pyinfra/flask.py
+++ b/pyinfra/flask.py
@ -6,8 +6,7 @@ from waitress import serve

 from pyinfra.config import CONFIG

-logger = logging.getLogger(__file__)
-logger.setLevel(CONFIG.service.logging_level)
+logger = logging.getLogger()


 def run_probing_webserver(app, host=None, port=None, mode=None):
--- a/pyinfra/parser/init.py
+++ b/pyinfra/parser/init.py
--- a/pyinfra/parser/blob_parser.py
+++ b/pyinfra/parser/blob_parser.py
@ -0,0 +1,14 @@
+import abc
+
+
+class ParsingError(Exception):
+    pass
+
+
+class BlobParser(abc.ABC):
+    @abc.abstractmethod
+    def parse(self, blob: bytes):
+        pass
+
+    def __call__(self, blob: bytes):
+        return self.parse(blob)
--- a/pyinfra/parser/parser_composer.py
+++ b/pyinfra/parser/parser_composer.py
@ -0,0 +1,67 @@
+import logging
+
+from funcy import rcompose
+
+from pyinfra.parser.blob_parser import ParsingError
+
+logger = logging.getLogger(__name__)
+
+
+class Either:
+    def __init__(self, item):
+        self.item = item
+
+    def bind(self):
+        return self.item
+
+
+class Left(Either):
+    pass
+
+
+class Right(Either):
+    pass
+
+
+class EitherParserWrapper:
+    def __init__(self, parser):
+        self.parser = parser
+
+    def __log(self, result):
+        if isinstance(result, Right):
+            logger.log(logging.DEBUG - 5, f"{self.parser.__class__.__name__} succeeded or forwarded on {result.bind()}")
+        else:
+            logger.log(logging.DEBUG - 5, f"{self.parser.__class__.__name__} failed on {result.bind()}")
+        return result
+
+    def parse(self, item: Either):
+        if isinstance(item, Left):
+
+            try:
+                return Right(self.parser(item.bind()))
+            except ParsingError:
+                return item
+
+        elif isinstance(item, Right):
+            return item
+
+        else:
+            return self.parse(Left(item))
+
+    def __call__(self, item):
+        return self.__log(self.parse(item))
+
+
+class EitherParserComposer:
+    def __init__(self, *parsers):
+        self.parser = rcompose(*map(EitherParserWrapper, parsers))
+
+    def parse(self, item):
+        result = self.parser(item)
+        if isinstance(result, Right):
+            return result.bind()
+        else:
+            raise ParsingError("All parsers failed.")
+
+    def __call__(self, item):
+        return self.parse(item)
--- a/pyinfra/parser/parsers/init.py
+++ b/pyinfra/parser/parsers/init.py
--- a/pyinfra/parser/parsers/identity.py
+++ b/pyinfra/parser/parsers/identity.py
@ -0,0 +1,7 @@
+from pyinfra.parser.blob_parser import BlobParser
+
+
+class IdentityBlobParser(BlobParser):
+
+    def parse(self, data: bytes):
+        return data
--- a/pyinfra/parser/parsers/json.py
+++ b/pyinfra/parser/parsers/json.py
@ -0,0 +1,21 @@
+import json
+
+from pyinfra.parser.blob_parser import BlobParser, ParsingError
+from pyinfra.server.packing import string_to_bytes
+
+
+class JsonBlobParser(BlobParser):
+
+    def parse(self, data: bytes):
+        try:
+            data = data.decode()
+            data = json.loads(data)
+        except (UnicodeDecodeError, json.JSONDecodeError, AttributeError) as err:
+            raise ParsingError from err
+
+        try:
+            data["data"] = string_to_bytes(data["data"])
+        except (KeyError, TypeError) as err:
+            raise ParsingError from err
+
+        return data
--- a/pyinfra/parser/parsers/string.py
+++ b/pyinfra/parser/parsers/string.py
@ -0,0 +1,9 @@
+from pyinfra.parser.blob_parser import BlobParser, ParsingError
+
+
+class StringBlobParser(BlobParser):
+    def parse(self, data: bytes):
+        try:
+            return data.decode()
+        except Exception as err:
+            raise ParsingError from err
--- a/pyinfra/pipeline_factory.py
+++ b/pyinfra/pipeline_factory.py
@ -0,0 +1,18 @@
+class CachedPipelineFactory:
+    def __init__(self, base_url, pipeline_factory):
+        self.base_url = base_url
+        self.operation2pipeline = {}
+        self.pipeline_factory = pipeline_factory
+
+    def get_pipeline(self, operation: str):
+        pipeline = self.operation2pipeline.get(operation, None) or self.__register_pipeline(operation)
+        return pipeline
+
+    def __register_pipeline(self, operation):
+        endpoint = self.__make_endpoint(operation)
+        pipeline = self.pipeline_factory(endpoint)
+        self.operation2pipeline[operation] = pipeline
+        return pipeline
+
+    def __make_endpoint(self, operation):
+        return f"{self.base_url}/{operation}"
--- a/pyinfra/queue/consumer.py
+++ b/pyinfra/queue/consumer.py
@ -2,15 +2,15 @@ from pyinfra.queue.queue_manager.queue_manager import QueueManager


 class Consumer:
-    def __init__(self, callback, queue_manager: QueueManager):
+    def __init__(self, visitor, queue_manager: QueueManager):
        self.queue_manager = queue_manager
-        self.callback = callback
+        self.visitor = visitor

-    def consume_and_publish(self):
-        self.queue_manager.consume_and_publish(self.callback)
+    def consume_and_publish(self, n=None):
+        self.queue_manager.consume_and_publish(self.visitor, n=n)

    def basic_consume_and_publish(self):
-        self.queue_manager.basic_consume_and_publish(self.callback)
+        self.queue_manager.basic_consume_and_publish(self.visitor)

    def consume(self, **kwargs):
        return self.queue_manager.consume(**kwargs)
--- a/pyinfra/queue/queue_manager/pika_queue_manager.py
+++ b/pyinfra/queue/queue_manager/pika_queue_manager.py
@ -1,18 +1,18 @@
 import json
 import logging
-import time
+from itertools import islice

 import pika

 from pyinfra.config import CONFIG
 from pyinfra.exceptions import ProcessingFailure, DataLoadingFailure
 from pyinfra.queue.queue_manager.queue_manager import QueueHandle, QueueManager
+from pyinfra.visitor import QueueVisitor

 logger = logging.getLogger("pika")
 logger.setLevel(logging.WARNING)

-logger = logging.getLogger(__name__)
-logger.setLevel(CONFIG.service.logging_level)
+logger = logging.getLogger()


 def monkey_patch_queue_handle(channel, queue) -> QueueHandle:
@ -83,7 +83,7 @@ class PikaQueueManager(QueueManager):
        self.channel.queue_declare(input_queue, arguments=args, auto_delete=False, durable=True)
        self.channel.queue_declare(output_queue, arguments=args, auto_delete=False, durable=True)

-    def republish(self, body, n_current_attempts, frame):
+    def republish(self, body: bytes, n_current_attempts, frame):
        self.channel.basic_publish(
            exchange="",
            routing_key=self._input_queue,
@ -100,7 +100,7 @@ class PikaQueueManager(QueueManager):
        logger.error(f"Adding to dead letter queue: {body}")
        self.channel.basic_reject(delivery_tag=frame.delivery_tag, requeue=False)

-    def publish_response(self, message, callback, max_attempts=3):
+    def publish_response(self, message, visitor: QueueVisitor, max_attempts=3):

        logger.debug(f"Processing {message}.")

@ -109,8 +109,15 @@ class PikaQueueManager(QueueManager):
        n_attempts = get_n_previous_attempts(properties) + 1

        try:
-            response = json.dumps(callback(json.loads(body)))
-            self.channel.basic_publish("", self._output_queue, response.encode())
+            response_messages = visitor(json.loads(body))
+
+            if isinstance(response_messages, dict):
+                response_messages = [response_messages]
+
+            for response_message in response_messages:
+                response_message = json.dumps(response_message).encode()
+                self.channel.basic_publish("", self._output_queue, response_message)
+
            self.channel.basic_ack(frame.delivery_tag)
        except (ProcessingFailure, DataLoadingFailure):

@ -124,20 +131,21 @@ class PikaQueueManager(QueueManager):
    def pull_request(self):
        return self.channel.basic_get(self._input_queue)

-    def consume(self, inactivity_timeout=None):
+    def consume(self, inactivity_timeout=None, n=None):
        logger.debug("Consuming")
-        return self.channel.consume(self._input_queue, inactivity_timeout=inactivity_timeout)
+        gen = self.channel.consume(self._input_queue, inactivity_timeout=inactivity_timeout)
+        yield from islice(gen, n)

-    def consume_and_publish(self, visitor):
+    def consume_and_publish(self, visitor: QueueVisitor, n=None):

-        logger.info(f"Consuming with callback {visitor.callback.__name__}")
+        logger.info(f"Consuming input queue.")

-        for message in self.consume():
+        for message in self.consume(n=n):
            self.publish_response(message, visitor)

-    def basic_consume_and_publish(self, visitor):
+    def basic_consume_and_publish(self, visitor: QueueVisitor):

-        logger.info(f"Basic consuming with callback {visitor.callback.__name__}")
+        logger.info(f"Basic consuming input queue.")

        def callback(channel, frame, properties, body):
            message = (frame, properties, body)
@ -150,6 +158,8 @@ class PikaQueueManager(QueueManager):
        try:
            self.channel.queue_purge(self._input_queue)
            self.channel.queue_purge(self._output_queue)
+            assert self.input_queue.to_list() == []
+            assert self.output_queue.to_list() == []
        except pika.exceptions.ChannelWrongStateError:
            pass

--- a/pyinfra/queue/queue_manager/queue_manager.py
+++ b/pyinfra/queue/queue_manager/queue_manager.py
@ -43,7 +43,7 @@ class QueueManager(abc.ABC):
        raise NotImplementedError

    @abc.abstractmethod
-    def consume_and_publish(self, callback):
+    def consume_and_publish(self, callback, n=None):
        raise NotImplementedError

    @abc.abstractmethod
--- a/pyinfra/server/init.py
+++ b/pyinfra/server/init.py
--- a/pyinfra/server/buffering/init.py
+++ b/pyinfra/server/buffering/init.py
--- a/pyinfra/server/buffering/bufferize.py
+++ b/pyinfra/server/buffering/bufferize.py
@ -0,0 +1,37 @@
+import logging
+from collections import deque
+
+from funcy import repeatedly, identity
+
+from pyinfra.exceptions import NoBufferCapacity
+from pyinfra.server.nothing import Nothing
+
+logger = logging.getLogger(__name__)
+
+
+def bufferize(fn, buffer_size=3, persist_fn=identity, null_value=None):
+    def buffered_fn(item):
+
+        if item is not Nothing:
+            buffer.append(persist_fn(item))
+
+        response_payload = fn(repeatedly(buffer.popleft, n_items_to_pop(buffer, item is Nothing)))
+
+        return response_payload or null_value
+
+    def buffer_full(current_buffer_size):
+        if current_buffer_size > buffer_size:
+            logger.warning(f"Overfull buffer. size: {current_buffer_size}; intended capacity: {buffer_size}")
+
+        return current_buffer_size == buffer_size
+
+    def n_items_to_pop(buffer, final):
+        current_buffer_size = len(buffer)
+        return (final or buffer_full(current_buffer_size)) * current_buffer_size
+
+    if not buffer_size > 0:
+        raise NoBufferCapacity("Buffer size must be greater than zero.")
+
+    buffer = deque()
+
+    return buffered_fn
--- a/pyinfra/server/buffering/queue.py
+++ b/pyinfra/server/buffering/queue.py
@ -0,0 +1,24 @@
+from collections import deque
+from itertools import takewhile
+
+from funcy import repeatedly
+
+from pyinfra.server.nothing import is_not_nothing, Nothing
+
+
+def stream_queue(queue):
+    yield from takewhile(is_not_nothing, repeatedly(queue.popleft))
+
+
+class Queue:
+    def __init__(self):
+        self.__queue = deque()
+
+    def append(self, package) -> None:
+        self.__queue.append(package)
+
+    def popleft(self):
+        return self.__queue.popleft() if self.__queue else Nothing
+
+    def __bool__(self):
+        return bool(self.__queue)
--- a/pyinfra/server/buffering/stream.py
+++ b/pyinfra/server/buffering/stream.py
@ -0,0 +1,44 @@
+from itertools import chain, takewhile
+from typing import Iterable
+
+from funcy import first, repeatedly, mapcat
+
+from pyinfra.server.buffering.bufferize import bufferize
+from pyinfra.server.nothing import Nothing, is_not_nothing
+
+
+class FlatStreamBuffer:
+    """Wraps a stream buffer and chains its output. Also flushes the stream buffer when applied to an iterable."""
+
+    def __init__(self, fn, buffer_size=3):
+        """Function `fn` needs to be mappable and return an iterable; ideally `fn` returns a generator."""
+        self.stream_buffer = StreamBuffer(fn, buffer_size=buffer_size)
+
+    def __call__(self, items):
+        items = chain(items, [Nothing])
+        yield from mapcat(self.stream_buffer, items)
+
+
+class StreamBuffer:
+    """Puts a streaming function between an input and an output buffer."""
+
+    def __init__(self, fn, buffer_size=3):
+        """Function `fn` needs to be mappable and return an iterable; ideally `fn` returns a generator."""
+        self.fn = bufferize(fn, buffer_size=buffer_size, null_value=[])
+        self.result_stream = chain([])
+
+    def __call__(self, item) -> Iterable:
+        self.push(item)
+        yield from takewhile(is_not_nothing, repeatedly(self.pop))
+
+    def push(self, item):
+        self.result_stream = chain(self.result_stream, self.compute(item))
+
+    def compute(self, item):
+        try:
+            yield from self.fn(item)
+        except TypeError as err:
+            raise TypeError("Function failed with type-error. Is it mappable?") from err
+
+    def pop(self):
+        return first(chain(self.result_stream, [Nothing]))
--- a/pyinfra/server/client_pipeline.py
+++ b/pyinfra/server/client_pipeline.py
@ -0,0 +1,16 @@
+from funcy import rcompose, flatten
+
+
+# TODO: remove the dispatcher component from the pipeline; it no longer actually dispatches
+class ClientPipeline:
+    def __init__(self, packer, dispatcher, receiver, interpreter):
+        self.pipe = rcompose(
+            packer,
+            dispatcher,
+            receiver,
+            interpreter,
+            flatten,  # each analysis call returns an iterable. Can be empty, singleton or multi item. Hence, flatten.
+        )
+
+    def __call__(self, *args, **kwargs):
+        yield from self.pipe(*args, **kwargs)
--- a/pyinfra/server/debugging.py
+++ b/pyinfra/server/debugging.py
@ -0,0 +1,27 @@
+from itertools import tee
+from typing import Iterable
+
+
+def inspect(prefix="inspect", embed=False):
+    """Can be used to inspect compositions of generator functions by placing inbetween two functions."""
+
+    def inner(x):
+
+        if isinstance(x, Iterable) and not isinstance(x, dict) and not isinstance(x, tuple):
+            x, y = tee(x)
+            y = list(y)
+        else:
+            y = x
+
+        l = f" {len(y)} items" if isinstance(y, list) else ""
+
+        print(f"{prefix}{l}:", y)
+
+        if embed:
+            import IPython
+
+            IPython.embed()
+
+        return x
+
+    return inner
--- a/pyinfra/server/dispatcher/init.py
+++ b/pyinfra/server/dispatcher/init.py
--- a/pyinfra/server/dispatcher/dispatcher.py
+++ b/pyinfra/server/dispatcher/dispatcher.py
@ -0,0 +1,30 @@
+import abc
+from typing import Iterable
+
+from more_itertools import peekable
+
+from pyinfra.server.nothing import Nothing
+
+
+def has_next(peekable_iter):
+    return peekable_iter.peek(Nothing) is not Nothing
+
+
+class Dispatcher:
+    def __call__(self, packages: Iterable[dict]):
+        yield from self.dispatch_methods(packages)
+
+    def dispatch_methods(self, packages):
+        packages = peekable(packages)
+        for package in packages:
+            method = self.patch if has_next(packages) else self.post
+            response = method(package)
+            yield response
+
+    @abc.abstractmethod
+    def patch(self, package):
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def post(self, package):
+        raise NotImplementedError
--- a/pyinfra/server/dispatcher/dispatchers/init.py
+++ b/pyinfra/server/dispatcher/dispatchers/init.py
--- a/pyinfra/server/dispatcher/dispatchers/queue.py
+++ b/pyinfra/server/dispatcher/dispatchers/queue.py
@ -0,0 +1,21 @@
+from itertools import takewhile
+
+from funcy import repeatedly, notnone
+
+from pyinfra.server.dispatcher.dispatcher import Dispatcher
+from pyinfra.server.stream.queued_stream_function import QueuedStreamFunction
+
+
+class QueuedStreamFunctionDispatcher(Dispatcher):
+    def __init__(self, queued_stream_function: QueuedStreamFunction):
+        self.queued_stream_function = queued_stream_function
+
+    def patch(self, package):
+        self.queued_stream_function.push(package)
+        # TODO: this is wonky and a result of the pipeline components having shifted behaviour through previous
+        #   refactorings. The analogous functionality for the rest pipeline is in the interpreter. Correct this
+        #   asymmetry!
+        yield from takewhile(notnone, repeatedly(self.queued_stream_function.pop))
+
+    def post(self, package):
+        yield from self.patch(package)
--- a/pyinfra/server/dispatcher/dispatchers/rest.py
+++ b/pyinfra/server/dispatcher/dispatchers/rest.py
@ -0,0 +1,14 @@
+import requests
+
+from pyinfra.server.dispatcher.dispatcher import Dispatcher
+
+
+class RestDispatcher(Dispatcher):
+    def __init__(self, endpoint):
+        self.endpoint = endpoint
+
+    def patch(self, package):
+        return requests.patch(self.endpoint, json=package)
+
+    def post(self, package):
+        return requests.post(self.endpoint, json=package)
--- a/pyinfra/server/exceptions.py
+++ b/pyinfra/server/exceptions.py
--- a/pyinfra/server/interpreter/init.py
+++ b/pyinfra/server/interpreter/init.py
--- a/pyinfra/server/interpreter/interpreter.py
+++ b/pyinfra/server/interpreter/interpreter.py
@ -0,0 +1,8 @@
+import abc
+from typing import Iterable
+
+
+class Interpreter(abc.ABC):
+    @abc.abstractmethod
+    def __call__(self, payloads: Iterable):
+        pass
--- a/pyinfra/server/interpreter/interpreters/init.py
+++ b/pyinfra/server/interpreter/interpreters/init.py
--- a/pyinfra/server/interpreter/interpreters/identity.py
+++ b/pyinfra/server/interpreter/interpreters/identity.py
@ -0,0 +1,8 @@
+from typing import Iterable
+
+from pyinfra.server.interpreter.interpreter import Interpreter
+
+
+class IdentityInterpreter(Interpreter):
+    def __call__(self, payloads: Iterable):
+        yield from payloads
--- a/pyinfra/server/interpreter/interpreters/rest_callback.py
+++ b/pyinfra/server/interpreter/interpreters/rest_callback.py
@ -0,0 +1,23 @@
+from typing import Iterable
+
+import requests
+from funcy import takewhile, repeatedly, mapcat
+
+from pyinfra.server.interpreter.interpreter import Interpreter
+
+
+def stream_responses(endpoint):
+    def receive():
+        response = requests.get(endpoint)
+        return response
+
+    def more_is_coming(response):
+        return response.status_code == 206
+
+    response_stream = takewhile(more_is_coming, repeatedly(receive))
+    yield from response_stream
+
+
+class RestPickupStreamer(Interpreter):
+    def __call__(self, payloads: Iterable):
+        yield from mapcat(stream_responses, payloads)
--- a/pyinfra/server/monitoring.py
+++ b/pyinfra/server/monitoring.py
@ -0,0 +1,39 @@
+from functools import lru_cache
+
+from funcy import identity
+from prometheus_client import CollectorRegistry, Summary
+
+from pyinfra.server.operation_dispatcher import OperationDispatcher
+
+
+class OperationDispatcherMonitoringDecorator:
+    def __init__(self, operation_dispatcher: OperationDispatcher, naming_policy=identity):
+        self.operation_dispatcher = operation_dispatcher
+        self.operation2metric = {}
+        self.naming_policy = naming_policy
+
+    @property
+    @lru_cache(maxsize=None)
+    def registry(self):
+        return CollectorRegistry(auto_describe=True)
+
+    def make_summary_instance(self, op: str):
+        return Summary(f"{self.naming_policy(op)}_seconds", f"Time spent on {op}.", registry=self.registry)
+
+    def submit(self, operation, request):
+        return self.operation_dispatcher.submit(operation, request)
+
+    def pickup(self, operation):
+        with self.get_monitor(operation):
+            return self.operation_dispatcher.pickup(operation)
+
+    def get_monitor(self, operation):
+        monitor = self.operation2metric.get(operation, None) or self.register_operation(operation)
+        return monitor.time()
+
+    def register_operation(self, operation):
+        summary = self.make_summary_instance(operation)
+        self.operation2metric[operation] = summary
+        return summary
+
+
--- a/pyinfra/server/normalization.py
+++ b/pyinfra/server/normalization.py
@ -0,0 +1,17 @@
+from itertools import chain
+from typing import Iterable, Union, Tuple
+
+from pyinfra.exceptions import UnexpectedItemType
+
+
+def normalize(itr: Iterable[Union[Tuple, Iterable]]) -> Iterable[Tuple]:
+    return chain.from_iterable(map(normalize_item, normalize_item(itr)))
+
+
+def normalize_item(itm: Union[Tuple, Iterable]) -> Iterable:
+    if isinstance(itm, tuple):
+        return [itm]
+    elif isinstance(itm, Iterable):
+        return itm
+    else:
+        raise UnexpectedItemType("Encountered an item that could not be normalized to a list.")
--- a/pyinfra/server/nothing.py
+++ b/pyinfra/server/nothing.py
@ -0,0 +1,6 @@
+class Nothing:
+    pass
+
+
+def is_not_nothing(x):
+    return x is not Nothing
--- a/pyinfra/server/operation_dispatcher.py
+++ b/pyinfra/server/operation_dispatcher.py
@ -0,0 +1,33 @@
+from itertools import starmap, tee
+from typing import Dict
+
+from funcy import juxt, zipdict, cat
+
+from pyinfra.server.stream.queued_stream_function import QueuedStreamFunction
+from pyinfra.server.stream.rest import LazyRestProcessor
+
+
+class OperationDispatcher:
+    def __init__(self, operation2function: Dict[str, QueuedStreamFunction]):
+        submit_suffixes, pickup_suffixes = zip(*map(juxt(submit_suffix, pickup_suffix), operation2function))
+        processors = starmap(LazyRestProcessor, zip(operation2function.values(), submit_suffixes, pickup_suffixes))
+        self.operation2processor = zipdict(submit_suffixes + pickup_suffixes, cat(tee(processors)))
+
+    @classmethod
+    @property
+    def pickup_suffix(cls):
+        return pickup_suffix("")
+
+    def submit(self, operation, request):
+        return self.operation2processor[operation].push(request)
+
+    def pickup(self, operation):
+        return self.operation2processor[operation].pop()
+
+
+def submit_suffix(op: str):
+    return "" if not op else op
+
+
+def pickup_suffix(op: str):
+    return "pickup" if not op else f"{op}_pickup"
--- a/pyinfra/server/packer/init.py
+++ b/pyinfra/server/packer/init.py
--- a/pyinfra/server/packer/packer.py
+++ b/pyinfra/server/packer/packer.py
@ -0,0 +1,8 @@
+import abc
+from typing import Iterable
+
+
+class Packer(abc.ABC):
+    @abc.abstractmethod
+    def __call__(self, data: Iterable, metadata: Iterable):
+        pass
--- a/pyinfra/server/packer/packers/init.py
+++ b/pyinfra/server/packer/packers/init.py
--- a/pyinfra/server/packer/packers/identity.py
+++ b/pyinfra/server/packer/packers/identity.py
@ -0,0 +1,14 @@
+from itertools import starmap
+from typing import Iterable
+
+from pyinfra.server.packer.packer import Packer
+
+
+def bundle(data: bytes, metadata: dict):
+    package = {"data": data, "metadata": metadata}
+    return package
+
+
+class IdentityPacker(Packer):
+    def __call__(self, data: Iterable, metadata):
+        yield from starmap(bundle, zip(data, metadata))
--- a/pyinfra/server/packer/packers/rest.py
+++ b/pyinfra/server/packer/packers/rest.py
@ -0,0 +1,9 @@
+from typing import Iterable
+
+from pyinfra.server.packer.packer import Packer
+from pyinfra.server.packing import pack_data_and_metadata_for_rest_transfer
+
+
+class RestPacker(Packer):
+    def __call__(self, data: Iterable[bytes], metadata: Iterable[dict]):
+        yield from pack_data_and_metadata_for_rest_transfer(data, metadata)
--- a/pyinfra/server/packing.py
+++ b/pyinfra/server/packing.py
@ -0,0 +1,34 @@
+import base64
+from _operator import itemgetter
+from itertools import starmap
+from typing import Iterable
+
+from funcy import compose
+
+from pyinfra.utils.func import starlift, lift
+
+
+def pack_data_and_metadata_for_rest_transfer(data: Iterable, metadata: Iterable):
+    yield from starmap(pack, zip(data, metadata))
+
+
+def unpack_fn_pack(fn):
+    return compose(starlift(pack), fn, lift(unpack))
+
+
+def pack(data: bytes, metadata: dict):
+    package = {"data": bytes_to_string(data), "metadata": metadata}
+    return package
+
+
+def unpack(package):
+    data, metadata = itemgetter("data", "metadata")(package)
+    return string_to_bytes(data), metadata
+
+
+def bytes_to_string(data: bytes) -> str:
+    return base64.b64encode(data).decode()
+
+
+def string_to_bytes(data: str) -> bytes:
+    return base64.b64decode(data.encode())
--- a/pyinfra/server/receiver/init.py
+++ b/pyinfra/server/receiver/init.py
--- a/pyinfra/server/receiver/receiver.py
+++ b/pyinfra/server/receiver/receiver.py
@ -0,0 +1,8 @@
+import abc
+from typing import Iterable
+
+
+class Receiver(abc.ABC):
+    @abc.abstractmethod
+    def __call__(self, package: Iterable):
+        pass
--- a/pyinfra/server/receiver/receivers/init.py
+++ b/pyinfra/server/receiver/receivers/init.py
--- a/pyinfra/server/receiver/receivers/identity.py
+++ b/pyinfra/server/receiver/receivers/identity.py
@ -0,0 +1,11 @@
+from typing import Iterable
+
+from pyinfra.server.receiver.receiver import Receiver
+from funcy import notnone
+
+
+class QueuedStreamFunctionReceiver(Receiver):
+
+    def __call__(self, responses: Iterable):
+        for response in filter(notnone, responses):
+            yield response
--- a/pyinfra/server/receiver/receivers/rest.py
+++ b/pyinfra/server/receiver/receivers/rest.py
@ -0,0 +1,16 @@
+from typing import Iterable
+
+import requests
+from funcy import chunks, flatten
+
+from pyinfra.server.receiver.receiver import Receiver
+
+
+class RestReceiver(Receiver):
+    def __init__(self, chunk_size=3):
+        self.chunk_size = chunk_size
+
+    def __call__(self, responses: Iterable[requests.Response]):
+        for response in flatten(chunks(self.chunk_size, responses)):
+            response.raise_for_status()
+            yield response.json()
--- a/pyinfra/server/server.py
+++ b/pyinfra/server/server.py
@ -0,0 +1,100 @@
+from functools import singledispatch
+from typing import Dict, Callable, Union
+
+from flask import Flask, jsonify, request
+from prometheus_client import generate_latest
+
+from pyinfra.config import CONFIG
+from pyinfra.server.buffering.stream import FlatStreamBuffer
+from pyinfra.server.monitoring import OperationDispatcherMonitoringDecorator
+from pyinfra.server.operation_dispatcher import OperationDispatcher
+from pyinfra.server.stream.queued_stream_function import QueuedStreamFunction
+
+
+@singledispatch
+def set_up_processing_server(arg: Union[dict, Callable], buffer_size=1):
+    """Produces a processing server given a streamable function or a mapping from operations to streamable functions.
+    Streamable functions are constructed by calling pyinfra.server.utils.make_streamable_and_wrap_in_packing_logic on a
+    function taking a tuple of data and metadata and also returning a tuple or yielding tuples of data and metadata.
+    If the function doesn't produce data, data should be an empty byte string.
+    If the function doesn't produce metadata, metadata should be an empty dictionary.
+
+    Args:
+        arg: streamable function or mapping of operations: str to streamable functions
+        buffer_size: If your function operates on batches this parameter controls how many items are aggregated before
+                     your function is applied.
+
+    TODO: buffer_size has to be controllable on per function basis.
+
+    Returns:
+        Processing server: flask app
+    """
+    pass
+
+
+@set_up_processing_server.register
+def _(operation2stream_fn: dict, buffer_size=1):
+    return __stream_fn_to_processing_server(operation2stream_fn, buffer_size)
+
+
+@set_up_processing_server.register
+def _(stream_fn: object, buffer_size=1):
+    operation2stream_fn = {None: stream_fn}
+    return __stream_fn_to_processing_server(operation2stream_fn, buffer_size)
+
+
+def __stream_fn_to_processing_server(operation2stream_fn: dict, buffer_size):
+    operation2stream_fn = {
+        op: QueuedStreamFunction(FlatStreamBuffer(fn, buffer_size)) for op, fn in operation2stream_fn.items()
+    }
+    return __set_up_processing_server(operation2stream_fn)
+
+
+def __set_up_processing_server(operation2function: Dict[str, QueuedStreamFunction]):
+    app = Flask(__name__)
+
+    dispatcher = OperationDispatcherMonitoringDecorator(
+        OperationDispatcher(operation2function),
+        naming_policy=naming_policy,
+    )
+
+    def ok():
+        resp = jsonify("OK")
+        resp.status_code = 200
+        return resp
+
+    @app.route("/ready", methods=["GET"])
+    def ready():
+        return ok()
+
+    @app.route("/health", methods=["GET"])
+    def healthy():
+        return ok()
+
+    @app.route("/prometheus", methods=["GET"])
+    def prometheus():
+        return generate_latest(registry=dispatcher.registry)
+
+    @app.route("/<operation>", methods=["POST", "PATCH"])
+    def submit(operation):
+        return dispatcher.submit(operation, request)
+
+    @app.route("/", methods=["POST", "PATCH"])
+    def submit_default():
+        return dispatcher.submit("", request)
+
+    @app.route("/<operation>", methods=["GET"])
+    def pickup(operation):
+        return dispatcher.pickup(operation)
+
+    return app
+
+
+def naming_policy(op_name: str):
+    pop_suffix = OperationDispatcher.pickup_suffix
+    prefix = f"redactmanager_{CONFIG.service.name}"
+
+    op_display_name = op_name.replace(f"_{pop_suffix}", "") if op_name != pop_suffix else "default"
+    complete_display_name = f"{prefix}_{op_display_name}"
+
+    return complete_display_name
--- a/pyinfra/server/stream/init.py
+++ b/pyinfra/server/stream/init.py
--- a/pyinfra/server/stream/queued_stream_function.py
+++ b/pyinfra/server/stream/queued_stream_function.py
@ -0,0 +1,21 @@
+from funcy import first
+
+from pyinfra.server.buffering.queue import stream_queue, Queue
+
+
+class QueuedStreamFunction:
+    def __init__(self, stream_function):
+        """Combines a stream function with a queue.
+
+        Args:
+            stream_function: Needs to operate on iterables.
+        """
+        self.queue = Queue()
+        self.stream_function = stream_function
+
+    def push(self, item):
+        self.queue.append(item)
+
+    def pop(self):
+        items = stream_queue(self.queue)
+        return first(self.stream_function(items))
--- a/pyinfra/server/stream/rest.py
+++ b/pyinfra/server/stream/rest.py
@ -0,0 +1,51 @@
+import logging
+
+from flask import jsonify
+from funcy import drop
+
+from pyinfra.server.nothing import Nothing
+from pyinfra.server.stream.queued_stream_function import QueuedStreamFunction
+
+logger = logging.getLogger(__name__)
+
+
+class LazyRestProcessor:
+    def __init__(self, queued_stream_function: QueuedStreamFunction, submit_suffix="submit", pickup_suffix="pickup"):
+        self.submit_suffix = submit_suffix
+        self.pickup_suffix = pickup_suffix
+        self.queued_stream_function = queued_stream_function
+
+    def push(self, request):
+        self.queued_stream_function.push(request.json)
+        return jsonify(replace_suffix(request.base_url, self.submit_suffix, self.pickup_suffix))
+
+    def pop(self):
+        result = self.queued_stream_function.pop() or Nothing
+
+        if not valid(result):
+            logger.error(f"Received invalid result: {result}")
+            result = Nothing
+
+        if result is Nothing:
+            logger.info("Analysis completed successfully.")
+            resp = jsonify("No more items left")
+            resp.status_code = 204
+
+        else:
+            logger.debug("Partial analysis completed.")
+            resp = jsonify(result)
+            resp.status_code = 206
+
+        return resp
+
+
+def valid(result):
+    return isinstance(result, dict) or result is Nothing
+
+
+def replace_suffix(strn, suf, repl):
+    return remove_last_n(strn, len(suf)) + repl
+
+
+def remove_last_n(strn, n):
+    return "".join(reversed(list(drop(n, reversed(strn)))))
--- a/pyinfra/server/utils.py
+++ b/pyinfra/server/utils.py
@ -0,0 +1,16 @@
+from funcy import compose, identity
+
+from pyinfra.server.normalization import normalize
+from pyinfra.server.packing import unpack_fn_pack
+from pyinfra.utils.func import starlift
+
+
+def make_streamable_and_wrap_in_packing_logic(fn, batched):
+    fn = make_streamable(fn, batched)
+    fn = unpack_fn_pack(fn)
+    return fn
+
+
+def make_streamable(fn, batched):
+    # FIXME: something broken with batched == True
+    return compose(normalize, (identity if batched else starlift)(fn))
--- a/pyinfra/storage/adapters/adapter.py
+++ b/pyinfra/storage/adapters/adapter.py
@ -30,5 +30,5 @@ class StorageAdapter(ABC):
        raise NotImplementedError

    @abstractmethod
-    def get_all_object_names(self, bucket_name):
+    def get_all_object_names(self, bucket_name, prefix=None):
        raise NotImplementedError
--- a/pyinfra/storage/adapters/azure.py
+++ b/pyinfra/storage/adapters/azure.py
@ -1,5 +1,4 @@
 import logging
-from itertools import repeat
 from operator import attrgetter

 from azure.storage.blob import ContainerClient, BlobServiceClient
@ -20,15 +19,15 @@ class AzureStorageAdapter(StorageAdapter):
        container_client = self.__client.get_container_client(bucket_name)
        return container_client.exists()

-    def make_bucket(self, bucket_name):
-        container_client = self.__client.get_container_client(bucket_name)
-        container_client if container_client.exists() else self.__client.create_container(bucket_name)
-
    def __provide_container_client(self, bucket_name) -> ContainerClient:
        self.make_bucket(bucket_name)
        container_client = self.__client.get_container_client(bucket_name)
        return container_client

+    def make_bucket(self, bucket_name):
+        container_client = self.__client.get_container_client(bucket_name)
+        container_client if container_client.exists() else self.__client.create_container(bucket_name)
+
    def put_object(self, bucket_name, object_name, data):
        logger.debug(f"Uploading '{object_name}'...")
        container_client = self.__provide_container_client(bucket_name)
@ -59,7 +58,7 @@ class AzureStorageAdapter(StorageAdapter):
        blobs = container_client.list_blobs()
        container_client.delete_blobs(*blobs)

-    def get_all_object_names(self, bucket_name):
+    def get_all_object_names(self, bucket_name, prefix=None):
        container_client = self.__provide_container_client(bucket_name)
-        blobs = container_client.list_blobs()
-        return zip(repeat(bucket_name), map(attrgetter("name"), blobs))
+        blobs = container_client.list_blobs(name_starts_with=prefix)
+        return map(attrgetter("name"), blobs)
--- a/pyinfra/storage/adapters/s3.py
+++ b/pyinfra/storage/adapters/s3.py
@ -1,6 +1,6 @@
 import io
-from itertools import repeat
 import logging
+from itertools import repeat
 from operator import attrgetter

 from minio import Minio
@ -53,6 +53,6 @@ class S3StorageAdapter(StorageAdapter):
        for obj in objects:
            self.__client.remove_object(bucket_name, obj.object_name)

-    def get_all_object_names(self, bucket_name):
-        objs = self.__client.list_objects(bucket_name, recursive=True)
-        return zip(repeat(bucket_name), map(attrgetter("object_name"), objs))
+    def get_all_object_names(self, bucket_name, prefix=None):
+        objs = self.__client.list_objects(bucket_name, recursive=True, prefix=prefix)
+        return map(attrgetter("object_name"), objs)
--- a/pyinfra/storage/storage.py
+++ b/pyinfra/storage/storage.py
@ -1,10 +1,10 @@
 import logging

-from retry import retry

 from pyinfra.config import CONFIG
 from pyinfra.exceptions import DataLoadingFailure
 from pyinfra.storage.adapters.adapter import StorageAdapter
+from pyinfra.utils.retry import retry

 logger = logging.getLogger(__name__)
 logger.setLevel(CONFIG.service.logging_level)
@ -26,7 +26,7 @@ class Storage:
    def get_object(self, bucket_name, object_name):
        return self.__get_object(bucket_name, object_name)

-    @retry(DataLoadingFailure, tries=3, delay=5, jitter=(1, 3))
+    @retry(DataLoadingFailure)
    def __get_object(self, bucket_name, object_name):
        try:
            return self.__adapter.get_object(bucket_name, object_name)
@ -40,5 +40,5 @@ class Storage:
    def clear_bucket(self, bucket_name):
        return self.__adapter.clear_bucket(bucket_name)

-    def get_all_object_names(self, bucket_name):
-        return self.__adapter.get_all_object_names(bucket_name)
+    def get_all_object_names(self, bucket_name, prefix=None):
+        return self.__adapter.get_all_object_names(bucket_name, prefix=prefix)
--- a/pyinfra/utils/encoding.py
+++ b/pyinfra/utils/encoding.py
@ -0,0 +1,14 @@
+import gzip
+import json
+
+
+def pack_analysis_payload(analysis_payload):
+    return compress(json.dumps(analysis_payload).encode())
+
+
+def compress(data: bytes):
+    return gzip.compress(data)
+
+
+def decompress(data: bytes):
+    return gzip.decompress(data)
--- a/pyinfra/utils/func.py
+++ b/pyinfra/utils/func.py
@ -0,0 +1,47 @@
+from itertools import starmap, tee
+
+from funcy import curry, compose, filter
+
+
+def lift(fn):
+    return curry(map)(fn)
+
+
+def llift(fn):
+    return compose(list, lift(fn))
+
+
+def starlift(fn):
+    return curry(starmap)(fn)
+
+
+def lstarlift(fn):
+    return compose(list, starlift(fn))
+
+
+def parallel(*fs):
+    return lambda *args: (f(a) for f, a in zip(fs, args))
+
+
+def star(f):
+    return lambda x: f(*x)
+
+
+def duplicate_stream_and_apply(f1, f2):
+    return compose(star(parallel(f1, f2)), tee)
+
+
+def foreach(fn, iterable):
+    for itm in iterable:
+        fn(itm)
+
+
+def flift(pred):
+    return curry(filter)(pred)
+
+
+def parallel_map(f1, f2):
+    """Applies functions to a stream in parallel and yields a stream of tuples:
+    parallel_map :: a -> b, a -> c -> [a] -> [(b, c)]
+    """
+    return compose(star(zip), duplicate_stream_and_apply(f1, f2))
--- a/pyinfra/utils/retry.py
+++ b/pyinfra/utils/retry.py
@ -0,0 +1,15 @@
+from pyinfra.config import CONFIG
+from retry import retry as _retry
+
+
+def retry(exception):
+
+    def decorator(fn):
+
+        @_retry(exception, tries=CONFIG.retry.tries, delay=CONFIG.retry.delay, jitter=tuple(CONFIG.retry.jitter))
+        def inner(*args, **kwargs):
+            return fn(*args, **kwargs)
+
+        return inner
+
+    return decorator
--- a/pyinfra/visitor.py
+++ b/pyinfra/visitor.py
@ -1,91 +0,0 @@
-import abc
-import gzip
-import json
-import logging
-from operator import itemgetter
-from typing import Callable
-
-from pyinfra.config import CONFIG, parse_disjunction_string
-from pyinfra.exceptions import DataLoadingFailure
-from pyinfra.storage.storage import Storage
-
-
-def get_object_name(body):
-    dossier_id, file_id, target_file_extension = itemgetter("dossierId", "fileId", "targetFileExtension")(body)
-    object_name = f"{dossier_id}/{file_id}.{target_file_extension}"
-    return object_name
-
-
-def get_response_object_name(body):
-    dossier_id, file_id, response_file_extension = itemgetter("dossierId", "fileId", "responseFileExtension")(body)
-    object_name = f"{dossier_id}/{file_id}.{response_file_extension}"
-    return object_name
-
-
-def get_object_descriptor(body):
-    return {"bucket_name": parse_disjunction_string(CONFIG.storage.bucket), "object_name": get_object_name(body)}
-
-
-def get_response_object_descriptor(body):
-    return {
-        "bucket_name": parse_disjunction_string(CONFIG.storage.bucket),
-        "object_name": get_response_object_name(body),
-    }
-
-
-class ResponseStrategy(abc.ABC):
-    @abc.abstractmethod
-    def handle_response(self, body):
-        pass
-
-    def __call__(self, body):
-        return self.handle_response(body)
-
-
-class StorageStrategy(ResponseStrategy):
-    def __init__(self, storage):
-        self.storage = storage
-
-    def handle_response(self, body):
-        self.storage.put_object(**get_response_object_descriptor(body), data=gzip.compress(json.dumps(body).encode()))
-        body.pop("data")
-        return body
-
-
-class ForwardingStrategy(ResponseStrategy):
-    def handle_response(self, body):
-        return body
-
-
-class QueueVisitor:
-    def __init__(self, storage: Storage, callback: Callable, response_strategy):
-        self.storage = storage
-        self.callback = callback
-        self.response_strategy = response_strategy
-
-    def load_data(self, body):
-        def download():
-            logging.debug(f"Downloading {object_descriptor}...")
-            data = self.storage.get_object(**object_descriptor)
-            logging.debug(f"Downloaded {object_descriptor}.")
-            return data
-
-        object_descriptor = get_object_descriptor(body)
-
-        try:
-            return gzip.decompress(download())
-        except Exception as err:
-            logging.warning(f"Loading data from storage failed for {object_descriptor}.")
-            raise DataLoadingFailure from err
-
-    def process_data(self, data, body):
-        return self.callback({**body, "data": data})
-
-    def load_and_process(self, body):
-        data = self.process_data(self.load_data(body), body)
-        result_body = {**body, "data": data}
-        return result_body
-
-    def __call__(self, body):
-        result_body = self.load_and_process(body)
-        return self.response_strategy(result_body)
--- a/pyinfra/visitor/init.py
+++ b/pyinfra/visitor/init.py
@ -0,0 +1 @@
+from .visitor import QueueVisitor
--- a/pyinfra/visitor/downloader.py
+++ b/pyinfra/visitor/downloader.py
@ -0,0 +1,58 @@
+import logging
+from functools import partial
+
+from funcy import compose
+
+from pyinfra.file_descriptor_manager import FileDescriptorManager
+from pyinfra.storage.storage import Storage
+from pyinfra.utils.encoding import decompress
+from pyinfra.utils.func import flift
+
+logger = logging.getLogger(__name__)
+
+
+class Downloader:
+    def __init__(self, storage: Storage, bucket_name, file_descriptor_manager: FileDescriptorManager):
+        self.storage = storage
+        self.bucket_name = bucket_name
+        self.file_descriptor_manager = file_descriptor_manager
+
+    def __call__(self, queue_item_body):
+        return self.download(queue_item_body)
+
+    def download(self, queue_item_body):
+        names_of_relevant_objects = self.get_names_of_objects_by_pattern(queue_item_body)
+        objects = self.download_and_decompress_object(names_of_relevant_objects)
+
+        return objects
+
+    def get_names_of_objects_by_pattern(self, queue_item_body):
+        logger.debug(f"Filtering objects in bucket {self.bucket_name} by pattern...")
+
+        names_of_relevant_objects = compose(
+            list,
+            self.get_pattern_filter(queue_item_body),
+            self.get_names_of_all_associated_objects,
+        )(queue_item_body)
+
+        logger.debug(f"Found {len(names_of_relevant_objects)} objects matching filter.")
+
+        return names_of_relevant_objects
+
+    def download_and_decompress_object(self, object_names):
+        download = partial(self.storage.get_object, self.bucket_name)
+        return map(compose(decompress, download), object_names)
+
+    def get_names_of_all_associated_objects(self, queue_item_body):
+        prefix = self.file_descriptor_manager.get_path_prefix(queue_item_body)
+        # TODO: performance tests for the following situations:
+        #  1) dossier with very many files
+        #  2) prefix matches very many files, independent of dossier cardinality
+        yield from self.storage.get_all_object_names(self.bucket_name, prefix=prefix)
+
+    def get_pattern_filter(self, queue_item_body):
+        file_pattern = self.file_descriptor_manager.build_input_matcher(queue_item_body)
+
+        logger.debug(f"Filtering pattern: {file_pattern if len(file_pattern) <= 120 else (file_pattern[:120]+'...')}")
+        matches_pattern = flift(file_pattern)
+        return matches_pattern
--- a/pyinfra/visitor/response_formatter/init.py
+++ b/pyinfra/visitor/response_formatter/init.py
--- a/pyinfra/visitor/response_formatter/formatter.py
+++ b/pyinfra/visitor/response_formatter/formatter.py
@ -0,0 +1,14 @@
+import abc
+
+from funcy import identity
+
+from pyinfra.utils.func import lift
+
+
+class ResponseFormatter(abc.ABC):
+    def __call__(self, message):
+        return (identity if isinstance(message, dict) else lift)(self.format)(message)
+
+    @abc.abstractmethod
+    def format(self, message):
+        pass
--- a/pyinfra/visitor/response_formatter/formatters/init.py
+++ b/pyinfra/visitor/response_formatter/formatters/init.py
--- a/pyinfra/visitor/response_formatter/formatters/default.py
+++ b/pyinfra/visitor/response_formatter/formatters/default.py
@ -0,0 +1,13 @@
+from funcy import first, omit
+
+from pyinfra.visitor.response_formatter.formatter import ResponseFormatter
+
+
+class DefaultResponseFormatter(ResponseFormatter):
+    """
+    TODO: Extend via using enums throughout the codebase instead of strings.
+        See the enum-formatter in image-prediction service for reference.
+    """
+
+    def format(self, message):
+        return {**omit(message, ["response_files"]), "responseFile": first(message["response_files"])}
--- a/pyinfra/visitor/response_formatter/formatters/identity.py
+++ b/pyinfra/visitor/response_formatter/formatters/identity.py
@ -0,0 +1,6 @@
+from pyinfra.visitor.response_formatter.formatter import ResponseFormatter
+
+
+class IdentityResponseFormatter(ResponseFormatter):
+    def format(self, message):
+        return message
--- a/pyinfra/visitor/strategies/init.py
+++ b/pyinfra/visitor/strategies/init.py
--- a/pyinfra/visitor/strategies/blob_parsing/init.py
+++ b/pyinfra/visitor/strategies/blob_parsing/init.py
--- a/pyinfra/visitor/strategies/blob_parsing/blob_parsing.py
+++ b/pyinfra/visitor/strategies/blob_parsing/blob_parsing.py
@ -0,0 +1,14 @@
+import abc
+
+
+class BlobParsingStrategy(abc.ABC):
+    @abc.abstractmethod
+    def parse(self, data: bytes):
+        pass
+
+    @abc.abstractmethod
+    def parse_and_wrap(self, data: bytes):
+        pass
+
+    def __call__(self, data: bytes):
+        return self.parse_and_wrap(data)
--- a/pyinfra/visitor/strategies/blob_parsing/dynamic.py
+++ b/pyinfra/visitor/strategies/blob_parsing/dynamic.py
@ -0,0 +1,20 @@
+from typing import Union
+
+from pyinfra.parser.parser_composer import EitherParserComposer
+from pyinfra.parser.parsers.identity import IdentityBlobParser
+from pyinfra.parser.parsers.json import JsonBlobParser
+from pyinfra.parser.parsers.string import StringBlobParser
+from pyinfra.visitor.strategies.blob_parsing.blob_parsing import BlobParsingStrategy
+
+
+# TODO: Each analysis service should specify a custom parsing strategy for the type of data it expects to be  found
+#  on the storage. This class is only a temporary trial-and-error->fallback type of solution.
+class DynamicParsingStrategy(BlobParsingStrategy):
+    def __init__(self):
+        self.parser = EitherParserComposer(JsonBlobParser(), StringBlobParser(), IdentityBlobParser())
+
+    def parse(self, data: bytes) -> Union[bytes, dict]:
+        return self.parser(data)
+
+    def parse_and_wrap(self, data):
+        return self.parse(data)
--- a/pyinfra/visitor/strategies/response/init.py
+++ b/pyinfra/visitor/strategies/response/init.py
--- a/pyinfra/visitor/strategies/response/aggregation.py
+++ b/pyinfra/visitor/strategies/response/aggregation.py
@ -0,0 +1,92 @@
+import abc
+from collections import deque
+from typing import Callable
+
+from funcy import omit, filter, first, lpluck, identity
+from more_itertools import peekable
+
+from pyinfra.file_descriptor_manager import FileDescriptorManager
+from pyinfra.server.nothing import Nothing, is_not_nothing
+from pyinfra.utils.encoding import pack_analysis_payload
+from pyinfra.visitor.strategies.response.response import ResponseStrategy
+
+
+class UploadFormatter(abc.ABC):
+    @abc.abstractmethod
+    def format(self, items):
+        raise NotImplementedError
+
+    def __call__(self, items):
+        return self.format(items)
+
+
+class ProjectingUploadFormatter(UploadFormatter):
+    def format(self, items):
+
+        head = first(items)
+        if head["data"]:
+            assert len(items) == 1
+            return head
+        else:
+            items = lpluck("metadata", items)
+            return items
+
+
+class AggregationStorageStrategy(ResponseStrategy):
+    def __init__(
+        self,
+        storage,
+        file_descriptor_manager: FileDescriptorManager,
+        merger: Callable = list,
+        upload_formatter: UploadFormatter = identity,
+    ):
+        self.storage = storage
+        self.file_descriptor_manager = file_descriptor_manager
+        self.merger = merger
+        self.upload_formatter = upload_formatter
+
+        self.buffer = deque()
+        self.response_files = deque()
+
+    def handle_response(self, analysis_response, final=False):
+        def upload_or_aggregate(analysis_payload):
+            request_metadata = omit(analysis_response, ["analysis_payloads"])
+            return self.upload_or_aggregate(analysis_payload, request_metadata, last=not analysis_payloads.peek(False))
+
+        analysis_payloads = peekable(analysis_response["analysis_payloads"])
+        yield from filter(is_not_nothing, map(upload_or_aggregate, analysis_payloads))
+
+    def upload_or_aggregate(self, analysis_payload, request_metadata, last=False):
+        """analysis_payload : {data: ..., metadata: ...}"""
+        storage_upload_info = self.file_descriptor_manager.build_storage_upload_info(analysis_payload, request_metadata)
+        object_descriptor = self.file_descriptor_manager.get_output_object_descriptor(storage_upload_info)
+
+        self.add_analysis_payload_to_buffer(analysis_payload)
+
+        if analysis_payload["data"] or last:
+            self.upload_aggregated_items(object_descriptor)
+            self.response_files.append(object_descriptor["object_name"])
+
+        return self.build_response_message(storage_upload_info) if last else Nothing
+
+    def add_analysis_payload_to_buffer(self, analysis_payload):
+        self.buffer.append({**analysis_payload, "metadata": omit(analysis_payload["metadata"], ["id"])})
+
+    def upload_aggregated_items(self, object_descriptor):
+        items_to_upload = self.upload_formatter(self.merge_queue_items())
+        self.upload_item(items_to_upload, object_descriptor)
+
+    def build_response_message(self, storage_upload_info):
+
+        response_files = [*self.response_files]
+        self.response_files.clear()
+
+        return {**storage_upload_info, "response_files": response_files}
+
+    def upload_item(self, analysis_payload, object_descriptor):
+        self.storage.put_object(**object_descriptor, data=pack_analysis_payload(analysis_payload))
+
+    def merge_queue_items(self):
+        merged_buffer_content = self.merger(self.buffer)
+        self.buffer.clear()
+        return merged_buffer_content
--- a/pyinfra/visitor/strategies/response/forwarding.py
+++ b/pyinfra/visitor/strategies/response/forwarding.py
@ -0,0 +1,6 @@
+from pyinfra.visitor.strategies.response.response import ResponseStrategy
+
+
+class ForwardingStrategy(ResponseStrategy):
+    def handle_response(self, analysis_response):
+        return analysis_response
--- a/pyinfra/visitor/strategies/response/response.py
+++ b/pyinfra/visitor/strategies/response/response.py
@ -0,0 +1,10 @@
+import abc
+
+
+class ResponseStrategy(abc.ABC):
+    @abc.abstractmethod
+    def handle_response(self, analysis_response: dict):
+        pass
+
+    def __call__(self, analysis_response: dict):
+        return self.handle_response(analysis_response)
--- a/pyinfra/visitor/strategies/response/storage.py
+++ b/pyinfra/visitor/strategies/response/storage.py
@ -0,0 +1,33 @@
+import json
+from operator import itemgetter
+
+from pyinfra.config import parse_disjunction_string, CONFIG
+from pyinfra.utils.encoding import compress
+from pyinfra.visitor.strategies.response.response import ResponseStrategy
+
+
+class StorageStrategy(ResponseStrategy):
+    def __init__(self, storage, response_file_extension="out"):
+        self.storage = storage
+        self.response_file_extension = response_file_extension
+
+    def handle_response(self, body: dict):
+        response_object_descriptor = self.get_response_object_descriptor(body)
+        self.storage.put_object(**response_object_descriptor, data=compress(json.dumps(body).encode()))
+        body.pop("analysis_payloads")
+        body["response_files"] = [response_object_descriptor["object_name"]]
+        return body
+
+    def get_response_object_descriptor(self, body):
+        return {
+            "bucket_name": parse_disjunction_string(CONFIG.storage.bucket),
+            "object_name": self.get_response_object_name(body),
+        }
+
+    def get_response_object_name(self, body):
+
+        dossier_id, file_id = itemgetter("dossierId", "fileId")(body)
+
+        object_name = f"{dossier_id}/{file_id}/.{self.response_file_extension}"
+
+        return object_name
--- a/pyinfra/visitor/utils.py
+++ b/pyinfra/visitor/utils.py
@ -0,0 +1,51 @@
+import logging
+from typing import Dict
+
+from pyinfra.exceptions import InvalidStorageItemFormat
+from pyinfra.server.packing import string_to_bytes
+
+logger = logging.getLogger()
+
+
+def build_file_path(storage_upload_info, folder):
+    return f"{storage_upload_info['fileId']}" + (f"/{folder}" if folder else "")
+
+
+def standardize(storage_item) -> Dict:
+    """Storage items can be a blob or a blob with metadata. Standardizes to the latter.
+
+    Cases:
+        1) backend upload: data as bytes
+        2) Some Python service's upload: data as bytes of a json string "{'data': <str>, 'metadata': <dict>}",
+          where value of key 'data' was encoded with bytes_to_string(...)
+
+    Returns:
+        {"data": bytes, "metadata": dict}
+    """
+
+    def is_blob_without_metadata(storage_item):
+        return isinstance(storage_item, bytes)
+
+    def is_blob_with_metadata(storage_item: Dict):
+        return isinstance(storage_item, dict)
+
+    if is_blob_without_metadata(storage_item):
+        return wrap(storage_item)
+
+    elif is_blob_with_metadata(storage_item):
+        validate(storage_item)
+        return storage_item
+
+    else:  # Fallback / used for testing with simple data
+        logger.warning("Encountered storage data in unexpected format.")
+        assert isinstance(storage_item, str)
+        return wrap(string_to_bytes(storage_item))
+
+
+def wrap(data):
+    return {"data": data, "metadata": {}}
+
+
+def validate(storage_item):
+    if not ("data" in storage_item and "metadata" in storage_item):
+        raise InvalidStorageItemFormat(f"Expected a mapping with keys 'data' and  'metadata', got {storage_item}.")
--- a/pyinfra/visitor/visitor.py
+++ b/pyinfra/visitor/visitor.py
@ -0,0 +1,84 @@
+from typing import Callable
+
+from funcy import lflatten, compose, itervalues, lfilter
+
+from pyinfra.utils.func import lift
+from pyinfra.visitor.response_formatter.formatter import ResponseFormatter
+from pyinfra.visitor.response_formatter.formatters.identity import IdentityResponseFormatter
+from pyinfra.visitor.strategies.blob_parsing.blob_parsing import BlobParsingStrategy
+from pyinfra.visitor.strategies.blob_parsing.dynamic import DynamicParsingStrategy
+from pyinfra.visitor.strategies.response.response import ResponseStrategy
+from pyinfra.visitor.utils import standardize
+
+
+class QueueVisitor:
+    def __init__(
+        self,
+        callback: Callable,
+        data_loader: Callable,
+        response_strategy: ResponseStrategy,
+        parsing_strategy: BlobParsingStrategy = None,
+        response_formatter: ResponseFormatter = None,
+    ):
+        """Processes queue messages with a given callback.
+
+        Args:
+            callback: callback to apply
+            data_loader: loads data specified in message and passes to callback
+            parsing_strategy: behaviour for interpreting loaded items
+            response_strategy: behaviour for response production
+
+        TODO: merge all dependencies into a single pipeline like: getter -> parser -> processor -> formatter -> putter
+
+        Returns:
+            depends on response strategy
+        """
+        self.callback = callback
+        self.data_loader = data_loader
+        self.response_strategy = response_strategy
+        self.parsing_strategy = parsing_strategy or DynamicParsingStrategy()
+        self.response_formatter = response_formatter or IdentityResponseFormatter()
+
+    def __call__(self, queue_item_body):
+        analysis_response = compose(
+            self.response_formatter,
+            self.response_strategy,
+            self.load_items_from_storage_and_process_with_callback,
+        )(queue_item_body)
+
+        return analysis_response
+
+    def load_items_from_storage_and_process_with_callback(self, queue_item_body):
+        """Bundles the result from processing a storage item with the body of the corresponding queue item."""
+
+        callback_results = compose(
+            self.remove_empty_results,
+            lflatten,
+            lift(self.get_item_processor(queue_item_body)),
+            self.load_data,
+        )(queue_item_body)
+
+        return {"analysis_payloads": callback_results, **queue_item_body}
+
+    def load_data(self, queue_item_body):
+        data = compose(
+            lift(standardize),
+            lift(self.parsing_strategy),
+            self.data_loader,
+        )(queue_item_body)
+
+        return data
+
+    def get_item_processor(self, queue_item_body):
+        def process_storage_item(storage_item):
+            analysis_input = {**storage_item, **queue_item_body}
+            return self.process_storage_item(analysis_input)
+
+        return process_storage_item
+
+    @staticmethod
+    def remove_empty_results(results):
+        return lfilter(compose(any, itervalues), results)
+
+    def process_storage_item(self, data_metadata_pack):
+        return self.callback(data_metadata_pack)
--- a/requirements.txt
+++ b/requirements.txt
@ -13,3 +13,9 @@ tqdm==4.62.3
 pytest~=7.0.1
 funcy==1.17
 fpdf==1.7.2
+PyMuPDF==1.19.6
+more-itertools==8.12.0
+numpy==1.22.3
+Pillow==9.0.1
+prometheus-client==0.13.1
+frozendict==2.3.2
--- a/scripts/manage_minio.py
+++ b/scripts/manage_minio.py
@ -1,5 +1,4 @@
 import argparse
-import gzip
 import os
 from pathlib import Path

@ -7,6 +6,7 @@ from tqdm import tqdm

 from pyinfra.config import CONFIG, parse_disjunction_string
 from pyinfra.storage.storages import get_s3_storage
+from pyinfra.utils.encoding import compress


 def parse_args():
@ -31,7 +31,7 @@ def combine_dossier_id_and_file_id_and_extension(dossier_id, file_id, extension)


 def upload_compressed_response(storage, bucket_name, dossier_id, file_id, result) -> None:
-    data = gzip.compress(result.encode())
+    data = compress(result.encode())
    path_gz = combine_dossier_id_and_file_id_and_extension(dossier_id, file_id, CONFIG.service.response.extension)
    storage.put_object(bucket_name, path_gz, data)

@ -44,7 +44,7 @@ def add_file_compressed(storage, bucket_name, dossier_id, path) -> None:
    path_gz = combine_dossier_id_and_file_id_and_extension(dossier_id, Path(path).stem, suffix_gz)

    with open(path, "rb") as f:
-        data = gzip.compress(f.read())
+        data = compress(f.read())
        storage.put_object(bucket_name, path_gz, data)


--- a/scripts/mock_client.py
+++ b/scripts/mock_client.py
@ -9,8 +9,13 @@ from pyinfra.storage.storages import get_s3_storage

 def parse_args():
    parser = argparse.ArgumentParser()
-    parser.add_argument("--bucket_name", "-b", required=True)
-    parser.add_argument("--analysis_container", "-a", choices=["detr", "ner", "image", "dl_error"], required=True)
+    parser.add_argument("--bucket_name", "-b")
+    parser.add_argument(
+        "--analysis_container",
+        "-a",
+        choices=["detr", "ner", "image", "conversion", "extraction", "dl_error", "table_parsing"],
+        required=True,
+    )
    args = parser.parse_args()
    return args

@ -43,20 +48,40 @@ def make_connection() -> pika.BlockingConnection:
    return connection


-def build_message_bodies(analyse_container_type, bucket_name):
+def build_message_bodies(analysis_container, bucket_name):
    def update_message(message_dict):
-        if analyse_container_type == "detr" or analyse_container_type == "image":
-            message_dict.update({"targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "IMAGE_INFO.json.gz"})
-        if analyse_container_type == "dl_error":
+        if analysis_container == "detr" or analysis_container == "image":
+            message_dict.update({"pages": []})
+        if analysis_container == "conversion":
+            message_dict.update(
+                {
+                    "targetFileExtension": "ORIGIN.pdf.gz",
+                    "responseFileExtension": "json.gz",
+                    "operation": "conversion",
+                    "pages": [1, 2, 3],
+                }
+            )
+        if analysis_container == "table_parsing":
+            message_dict.update(
+                {
+                    "operation": "table_parsing",
+                    "pages": [1, 2, 3],
+                }
+            )
+        if analysis_container == "extraction":
+            message_dict.update(
+                {"targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "json.gz", "operation": "extraction"}
+            )
+        if analysis_container == "dl_error":
            message_dict.update({"targetFileExtension": "no_such_file", "responseFileExtension": "IMAGE_INFO.json.gz"})
-        if analyse_container_type == "ner":
+        if analysis_container == "ner":
            message_dict.update(
                {"targetFileExtension": "TEXT.json.gz", "responseFileExtension": "NER_ENTITIES.json.gz"}
            )
        return message_dict

    storage = get_s3_storage()
-    for bucket_name, pdf_name in storage.get_all_object_names(bucket_name):
+    for pdf_name in storage.get_all_object_names(bucket_name):
        if "pdf" not in pdf_name:
            continue
        file_id = pdf_name.split(".")[0]
@ -72,7 +97,9 @@ def main(args):
    declare_queue(channel, CONFIG.rabbitmq.queues.input)
    declare_queue(channel, CONFIG.rabbitmq.queues.output)

-    for body in build_message_bodies(args.analysis_container, args.bucket_name):
+    bucket_name = args.bucket_name or parse_disjunction_string(CONFIG.storage.bucket)
+
+    for body in build_message_bodies(args.analysis_container, bucket_name):
        channel.basic_publish("", CONFIG.rabbitmq.queues.input, body)
        print(f"Put {body} on {CONFIG.rabbitmq.queues.input}")

--- a/scripts/show_compressed_json.py
+++ b/scripts/show_compressed_json.py
@ -0,0 +1,37 @@
+import argparse
+import gzip
+import json
+
+from funcy import lmap
+
+from pyinfra.server.packing import string_to_bytes
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("compressed_json_path", help="Path to compressed JSON file")
+    return parser.parse_args()
+
+
+def interpret(parsed):
+    try:
+        return {**parsed, "data": str(string_to_bytes(parsed["data"]))}
+    except KeyError:
+        return parsed
+
+
+def main(fp):
+    with open(fp, "rb") as f:
+        compressed_json_path = f.read()
+
+    json_str = gzip.decompress(compressed_json_path)
+    parsed = json.loads(json_str)
+    parsed = [parsed] if isinstance(parsed, dict) else parsed
+    parsed = lmap(interpret, parsed)
+
+    print(json.dumps(parsed, indent=2))
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args.compressed_json_path)
--- a/src/serve.py
+++ b/src/serve.py
@ -1,42 +1,25 @@
 import logging
 from multiprocessing import Process

-import requests
-from retry import retry
+from pyinfra.utils.retry import retry

 from pyinfra.config import CONFIG
-from pyinfra.exceptions import AnalysisFailure, ConsumerError
+from pyinfra.component_factory import ComponentFactory
+from pyinfra.exceptions import ConsumerError
 from pyinfra.flask import run_probing_webserver, set_up_probing_webserver
-from pyinfra.queue.consumer import Consumer
-from pyinfra.queue.queue_manager.pika_queue_manager import PikaQueueManager
-from pyinfra.storage.storages import get_storage
 from pyinfra.utils.banner import show_banner
-from pyinfra.visitor import QueueVisitor, StorageStrategy
+
+logger = logging.getLogger()


-def make_callback(analysis_endpoint):
-    def callback(message):
-        def perform_operation(operation):
-            endpoint = f"{analysis_endpoint}/{operation}"
-            try:
-                logging.debug(f"Requesting analysis from {endpoint}...")
-                analysis_response = requests.post(endpoint, data=message["data"])
-                analysis_response.raise_for_status()
-                analysis_response = analysis_response.json()
-                logging.debug(f"Received response.")
-                return analysis_response
-            except Exception as err:
-                logging.warning(f"Exception caught when calling analysis endpoint {endpoint}.")
-                raise AnalysisFailure() from err
-
-        operations = message.get("operations", ["/"])
-        results = map(perform_operation, operations)
-        result = dict(zip(operations, results))
-        if list(result.keys()) == ["/"]:
-            result = list(result.values())[0]
-        return result
-
-    return callback
+@retry(ConsumerError)
+def consume():
+    try:
+        consumer = ComponentFactory(CONFIG).get_consumer()
+        consumer.basic_consume_and_publish()
+    except Exception as err:
+        logger.exception(err)
+        raise ConsumerError from err


 def main():
@ -46,20 +29,6 @@ def main():
    logging.info("Starting webserver...")
    webserver.start()

-    callback = make_callback(CONFIG.rabbitmq.callback.analysis_endpoint)
-    storage = get_storage(CONFIG.storage.backend)
-    response_strategy = StorageStrategy(storage)
-    visitor = QueueVisitor(storage, callback, response_strategy)
-
-    @retry(ConsumerError, tries=3, delay=5, jitter=(1, 3))
-    def consume():
-        try:  # RED-4049 queue manager needs to be in try scope to eventually throw Exception after connection loss.
-            queue_manager = PikaQueueManager(CONFIG.rabbitmq.queues.input, CONFIG.rabbitmq.queues.output)
-            consumer = Consumer(visitor, queue_manager)
-            consumer.basic_consume_and_publish()
-        except Exception as err:
-            raise ConsumerError from err
-
    try:
        consume()
    except KeyboardInterrupt:
--- a/test/config.yaml
+++ b/test/config.yaml
@ -1,3 +1,55 @@
+service:
+  response_formatter: identity
+  operations:
+    upper:
+      input:
+        subdir: ""
+        extension: up_in.gz
+        multi: False
+      output:
+        subdir: ""
+        extension: up_out.gz
+    extract:
+      input:
+        subdir: ""
+        extension: extr_in.gz
+        multi: False
+      output:
+        subdir: "extractions"
+        extension: gz
+    rotate:
+      input:
+        subdir: ""
+        extension: rot_in.gz
+        multi: False
+      output:
+        subdir: ""
+        extension: rot_out.gz
+    classify:
+      input:
+        subdir: ""
+        extension: cls_in.gz
+        multi: True
+      output:
+        subdir: ""
+        extension: cls_out.gz
+    stream_pages:
+      input:
+        subdir: ""
+        extension: pgs_in.gz
+        multi: False
+      output:
+        subdir: "pages"
+        extension: pgs_out.gz
+    default:
+      input:
+        subdir: ""
+        extension: IN.gz
+        multi: False
+      output:
+        subdir: ""
+        extension: OUT.gz
+
 storage:
  minio:
    endpoint: "http://127.0.0.1:9000"
@ -21,5 +73,7 @@ webserver:
  port: $SERVER_PORT|5000  # webserver port
  mode: $SERVER_MODE|production  # webserver mode: {development, production}

+mock_analysis_endpoint: "http://127.0.0.1:5000"

-mock_analysis_endpoint: "http://127.0.0.1:5000"
+use_docker_fixture: 1
+logging: 0
--- a/test/unit_tests/conftest.py
+++ b/test/unit_tests/conftest.py
@ -1,11 +1,20 @@
 import json
+
+from pyinfra.config import CONFIG as MAIN_CONFIG
+
+MAIN_CONFIG["retry"]["delay"] = 0.1
+MAIN_CONFIG["retry"]["jitter"] = (0.1, 0.2)
+
+from pyinfra.default_objects import get_component_factory
+from test.config import CONFIG as TEST_CONFIG
+
 import logging
 import time
 from unittest.mock import Mock

 import pika
 import pytest
-import testcontainers.compose
+from testcontainers.compose import DockerCompose

 from pyinfra.exceptions import UnknownClient
 from pyinfra.locations import TEST_DIR, COMPOSE_PATH
@ -16,14 +25,36 @@ from pyinfra.storage.adapters.s3 import S3StorageAdapter
 from pyinfra.storage.clients.azure import get_azure_client
 from pyinfra.storage.clients.s3 import get_s3_client
 from pyinfra.storage.storage import Storage
-from test.config import CONFIG
+from pyinfra.visitor import QueueVisitor
+from pyinfra.visitor.strategies.response.forwarding import ForwardingStrategy
+from pyinfra.visitor.strategies.response.storage import StorageStrategy
 from test.queue.queue_manager_mock import QueueManagerMock
 from test.storage.adapter_mock import StorageAdapterMock
 from test.storage.client_mock import StorageClientMock
-from pyinfra.visitor import StorageStrategy, ForwardingStrategy, QueueVisitor

-logger = logging.getLogger(__name__)
-logger.setLevel(logging.DEBUG)
+
+logging.basicConfig()
+logger = logging.getLogger()
+
+
+# TODO: refactor all fixtures into cleanly separated modules
+pytest_plugins = [
+    "test.fixtures.consumer",
+    "test.fixtures.input",
+    "test.fixtures.pdf",
+    "test.fixtures.server",
+    "test.integration_tests.serve_test",
+]
+
+
+logging.getLogger("PIL.PngImagePlugin").setLevel(level=logging.CRITICAL + 1)
+logging.getLogger("waitress").setLevel(level=logging.CRITICAL + 1)
+
+
+@pytest.fixture(autouse=True)
+def mute_logger():
+    if not TEST_CONFIG.logging:
+        logger.setLevel(logging.CRITICAL + 1)


@pytest.fixture(scope="session")
@ -58,10 +89,14 @@ def mock_make_load_data():
    return load_data


-@pytest.fixture(params=["minio", "aws"], scope="session")
-def storage(client_name, bucket_name, request):
+# def pytest_make_parametrize_id(config, val, argname):
+#     return f"\n\t{argname}={val}\n"
+
+
+@pytest.fixture
+def storage(client_name, bucket_name, s3_backend, docker_compose):
    logger.debug("Setup for storage")
-    storage = Storage(get_adapter(client_name, request.param))
+    storage = Storage(get_adapter(client_name, s3_backend))
    storage.make_bucket(bucket_name)
    storage.clear_bucket(bucket_name)
    yield storage
@ -69,15 +104,23 @@ def storage(client_name, bucket_name, request):
    storage.clear_bucket(bucket_name)


+@pytest.fixture(params=["minio", "aws"])
+def s3_backend(request):
+    return request.param
+
+
@pytest.fixture(scope="session", autouse=True)
 def docker_compose(sleep_seconds=30):
-    logger.info(f"Starting docker containers with {COMPOSE_PATH}/docker-compose.yml...")
-    compose = testcontainers.compose.DockerCompose(COMPOSE_PATH, compose_file_name="docker-compose.yml")
-    compose.start()
-    logger.info(f"Sleeping for {sleep_seconds} seconds to wait for containers to finish startup... ")
-    time.sleep(sleep_seconds)
-    yield compose
-    compose.stop()
+    if TEST_CONFIG.use_docker_fixture:
+        logger.info(f"Starting docker containers with {COMPOSE_PATH}/docker-compose.yml...")
+        compose = DockerCompose(COMPOSE_PATH, compose_file_name="docker-compose.yml")
+        compose.start()
+        logger.info(f"Sleeping for {sleep_seconds} seconds to wait for containers to finish startup... ")
+        time.sleep(sleep_seconds)
+        yield compose
+        compose.stop()
+    else:
+        yield None


 def get_pika_connection_params():
@ -86,7 +129,7 @@ def get_pika_connection_params():


 def get_s3_params(s3_backend):
-    params = CONFIG.storage[s3_backend]
+    params = TEST_CONFIG.storage[s3_backend]

    return params

@ -95,7 +138,7 @@ def get_adapter(client_name, s3_backend):
    if client_name == "mock":
        return StorageAdapterMock(StorageClientMock())
    if client_name == "azure":
-        return AzureStorageAdapter(get_azure_client(CONFIG.storage.azure.connection_string))
+        return AzureStorageAdapter(get_azure_client(TEST_CONFIG.storage.azure.connection_string))
    if client_name == "s3":
        return S3StorageAdapter(get_s3_client(get_s3_params(s3_backend)))
    else:
@ -110,7 +153,7 @@ def get_queue_manager(queue_manager_name) -> QueueManager:


@pytest.fixture(scope="session")
-def queue_manager(queue_manager_name):
+def queue_manager(queue_manager_name, docker_compose):
    def close_connections():
        if queue_manager_name == "pika":
            try:
@ -134,7 +177,7 @@ def queue_manager(queue_manager_name):
@pytest.fixture(scope="session")
 def callback():
    def inner(request):
-        return request["data"].decode() * 2
+        return [request["data"].decode() * 2]

    return inner

@ -156,5 +199,22 @@ def response_strategy(response_strategy_name, storage):


@pytest.fixture()
-def visitor(storage, analysis_callback, response_strategy):
-    return QueueVisitor(storage, analysis_callback, response_strategy)
+def visitor(storage, analysis_callback, response_strategy, component_factory):
+
+    return QueueVisitor(
+        callback=analysis_callback,
+        data_loader=component_factory.get_downloader(storage),
+        response_strategy=response_strategy,
+    )
+
+
+@pytest.fixture
+def file_descriptor_manager(component_factory):
+    return component_factory.get_file_descriptor_manager()
+
+
+@pytest.fixture
+def component_factory():
+    MAIN_CONFIG["service"]["operations"] = TEST_CONFIG.service.operations
+
+    return get_component_factory(MAIN_CONFIG)
--- a/test/exploration_tests/pickup_endpoint_test.py
+++ b/test/exploration_tests/pickup_endpoint_test.py
@ -0,0 +1,12 @@
+# import pytest
+# from funcy import lmap
+#
+# from pyinfra.server.rest import process_lazily
+# from pyinfra.server.utils import unpack
+#
+#
+# @pytest.mark.parametrize("batched", [True, False])
+# @pytest.mark.parametrize("item_type", ["string", "image", "pdf"])
+# def test_pickup_endpoint(url, input_data_items, metadata, operation, targets, server_process):
+#     output = lmap(unpack, process_lazily(f"{url}/submit", input_data_items, metadata))
+#     assert output == targets
--- a/test/exploration_tests/repeated_first_chunk.py
+++ b/test/exploration_tests/repeated_first_chunk.py
@ -0,0 +1,32 @@
+from functools import partial
+
+from funcy import chunks, first, compose
+
+
+def test_repeated_first_chunk_consumption():
+    def f(chunk):
+        return sum(chunk)
+
+    def g():
+        return f(first(chunks(3, items)))
+
+    items = iter(range(10))
+
+    assert g() == 3
+    assert g() == 12
+    assert g() == 21
+    assert g() == 9
+
+
+def test_repeated_first_chunk_consumption_passing():
+    def f(chunk):
+        return sum(chunk)
+
+    g = compose(f, first, partial(chunks, 3))
+
+    items = iter(range(10))
+
+    assert g(items) == 3
+    assert g(items) == 12
+    assert g(items) == 21
+    assert g(items) == 9
--- a/test/fixtures/init.py
+++ b/test/fixtures/init.py
--- a/test/fixtures/consumer.py
+++ b/test/fixtures/consumer.py
@ -0,0 +1,37 @@
+from operator import itemgetter
+from typing import Iterable
+
+import pytest
+
+from pyinfra.queue.consumer import Consumer
+
+
+@pytest.fixture(scope="session")
+def consumer(queue_manager, callback):
+    return Consumer(callback, queue_manager)
+
+
+@pytest.fixture(scope="session")
+def access_callback():
+    return itemgetter("fileId")
+
+
+@pytest.fixture()
+def items():
+    numbers = [f"{i}".encode() for i in range(3)]
+    return pair_data_with_queue_message(numbers)
+
+
+def pair_data_with_queue_message(data: Iterable[bytes]):
+    def inner():
+        for i, d in enumerate(data):
+            body = {
+                "dossierId": "dossier_id",
+                "fileId": f"file_id_{i}",
+                "targetFileExtension": "in.gz",
+                "responseFileExtension": "out.gz",
+                "pages": [0, 2, 3],
+            }
+            yield d, body
+
+    return list(inner())
--- a/test/fixtures/input.py
+++ b/test/fixtures/input.py
@ -0,0 +1,165 @@
+from functools import partial
+from itertools import starmap, repeat
+
+import numpy as np
+import pytest
+from PIL import Image
+from funcy import lmap, compose, flatten, lflatten, omit, second, first, lzip, merge
+
+from pyinfra.server.normalization import normalize_item
+from pyinfra.server.nothing import Nothing
+from pyinfra.server.packing import pack, unpack
+from pyinfra.utils.func import star, lift, lstarlift
+from test.utils.image import image_to_bytes
+from test.utils.pdf import pdf_stream
+
+
+@pytest.fixture
+def input_data_items(unencoded_input_data, input_data_encoder):
+    return input_data_encoder(unencoded_input_data)
+
+
+@pytest.fixture
+def unencoded_input_data(item_type, unencoded_strings, unencoded_images, unencoded_pdfs):
+    if item_type == "string":
+        return unencoded_strings
+    elif item_type == "image":
+        return unencoded_images
+    elif item_type == "pdf":
+        return unencoded_pdfs
+    else:
+        raise ValueError(f"Unknown item type {item_type}")
+
+
+@pytest.fixture
+def input_data_encoder(item_type):
+    if item_type == "string":
+        return strings_to_bytes
+    elif item_type == "image":
+        return images_to_bytes
+    elif item_type == "pdf":
+        return pdfs_to_bytes
+    else:
+        raise ValueError(f"Unknown item type {item_type}")
+
+
+@pytest.fixture
+def unencoded_pdfs(n_items, unencoded_pdf):
+    return [unencoded_pdf] * n_items
+
+
+def pdfs_to_bytes(unencoded_pdfs):
+    return [pdf_stream(pdf) for pdf in unencoded_pdfs]
+
+
+@pytest.fixture
+def target_data_items(input_data_items, core_operation, metadata):
+
+    if core_operation is Nothing:
+        return Nothing
+
+    op = compose(normalize_item, core_operation)
+    expected = lflatten(starmap(op, zip(input_data_items, metadata)))
+    return expected
+
+
+@pytest.fixture
+def unencoded_strings(n_items):
+    return [f"content{i}" for i in range(n_items)]
+
+
+def strings_to_bytes(strings):
+    return [bytes(s, encoding="utf8") for s in strings]
+
+
+@pytest.fixture
+def targets(data_message_pairs, input_data_items, operation, metadata, server_side_test, queue_message_metadata):
+    """TODO: this has become super wonky"""
+    metadata = [{**m1, **m2} for m1, m2 in zip(lmap(second, data_message_pairs), metadata)]
+
+    if operation is Nothing:
+        return Nothing
+
+    op = compose(lift(star(pack)), normalize_item, operation)
+
+    try:
+        response_data, response_metadata = zip(*map(unpack, flatten(starmap(op, zip(input_data_items, metadata)))))
+
+        queue_message_keys = ["id"] * (not server_side_test) + [*first(queue_message_metadata).keys()]
+        response_metadata = lmap(partial(omit, keys=queue_message_keys), response_metadata)
+        expected = lzip(response_data, response_metadata)
+
+    except ValueError:
+        expected = []
+
+    return expected
+
+
+@pytest.fixture
+def endpoint(url, operation_name):
+    return f"{url}/{operation_name}"
+
+
+@pytest.fixture(params=["rest", "basic"])
+def client_pipeline_type(request):
+    return request.param
+
+
+@pytest.fixture(params=[1, 0, 5])
+def n_items(request):
+    return request.param
+
+
+@pytest.fixture(params=[0, 100])
+def n_pages(request):
+    return request.param
+
+
+@pytest.fixture(params=[1, 5])
+def buffer_size(request):
+    return request.param
+
+
+def array_to_image(array) -> Image.Image:
+    return Image.fromarray(np.uint8(array * 255), mode="RGB")
+
+
+def input_batch(n_items):
+    return np.random.random_sample(size=(n_items, 3, 30, 30))
+
+
+@pytest.fixture
+def unencoded_images(n_items):
+    return lmap(array_to_image, input_batch(n_items))
+
+
+def images_to_bytes(images):
+    return lmap(image_to_bytes, images)
+
+
+@pytest.fixture
+def metadata(n_items, many_to_n):
+    """storage metadata
+    TODO: rename
+    """
+    return list(repeat({"key": "value"}, times=n_items))
+
+
+@pytest.fixture
+def queue_message_metadata(n_items, operation_name):
+    def metadata(i):
+        return merge(
+            {
+                "dossierId": "dossier_id",
+                "fileId": f"file_id_{i}",
+            },
+            ({"operation": operation_name} if operation_name else {}),
+            ({"pages": [0, 2, 3]} if n_items > 1 else {}),
+        )
+
+    return lmap(metadata, range(n_items))
+
+
+@pytest.fixture
+def packages(input_data_items, metadata):
+    return lstarlift(pack)(zip(input_data_items, metadata))
--- a/test/fixtures/pdf.py
+++ b/test/fixtures/pdf.py
@ -0,0 +1,11 @@
+import fpdf
+import pytest
+
+
+@pytest.fixture
+def unencoded_pdf(n_pages):
+    pdf = fpdf.FPDF(unit="pt")
+    for _ in range(n_pages):
+        pdf.add_page()
+
+    return pdf
--- a/Show More
+++ b/Show More