pyinfra/pyinfra/callback.py

import logging

from funcy import merge, omit, lmap

from pyinfra.exceptions import AnalysisFailure

logger = logging.getLogger(__name__)


class Callback:
    """This is the callback that is applied to items pulled from the storage. It forwards these items to an analysis
    endpoint.
    """

    def __init__(self, base_url, pipeline_factory):
        self.base_url = base_url
        self.pipeline_factory = pipeline_factory
        self.endpoint2pipeline = {}

    def __make_endpoint(self, operation):
        return f"{self.base_url}/{operation}"

    def __get_pipeline(self, endpoint):
        if endpoint in self.endpoint2pipeline:
            pipeline = self.endpoint2pipeline[endpoint]

        else:
            pipeline = self.pipeline_factory(endpoint)
            self.endpoint2pipeline[endpoint] = pipeline

        return pipeline

    @staticmethod
    def __run_pipeline(pipeline, body):
        """
        TODO: Since data and metadata are passed as singletons, there is no buffering and hence no batching happening
         within the pipeline. However, the queue acknowledgment logic needs to be changed in order to facilitate
         passing   non-singletons, to only ack a message, once a response is pulled from the output queue of the
         pipeline. Probably the pipeline return value needs to contains the queue message frame (or so), in order for
         the queue manager to tell which message to ack.

        TODO: casting list (lmap) on `analysis_response_stream` is a temporary solution, while the client pipeline
         operates on singletons ([data], [metadata]).
        """

        def combine_storage_item_metadata_with_queue_message_metadata(body):
            return merge(body["metadata"], omit(body, ["data", "metadata"]))

        def remove_queue_message_metadata(result):
            metadata = omit(result["metadata"], queue_message_keys(body))
            return {**result, "metadata": metadata}

        def queue_message_keys(body):
            return {*body.keys()}.difference({"data", "metadata"})

        try:
            data = body["data"]
            metadata = combine_storage_item_metadata_with_queue_message_metadata(body)
            analysis_response_stream = pipeline([data], [metadata])
            analysis_response_stream = lmap(remove_queue_message_metadata, analysis_response_stream)
            return analysis_response_stream

        except Exception as err:
            logger.error(err)
            raise AnalysisFailure from err

    def __call__(self, body: dict):
        operation = body.get("operation", "submit")
        endpoint = self.__make_endpoint(operation)
        pipeline = self.__get_pipeline(endpoint)

        try:
            logging.debug(f"Requesting analysis from {endpoint}...")
            return self.__run_pipeline(pipeline, body)
        except AnalysisFailure:
            logging.warning(f"Exception caught when calling analysis endpoint {endpoint}.")