Merge in RR/pyinfra from RED-6366-refactor to master
Squashed commit of the following:
commit 8807cda514b5cc24b1be208173283275d87dcb97
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Fri Mar 10 13:15:15 2023 +0100
enable docker-compose autouse for automatic tests
commit c4579581d3e9a885ef387ee97f3f3a5cf4731193
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Fri Mar 10 12:35:49 2023 +0100
black
commit ac2b754c5624ef37ce310fce7196c9ea11bbca03
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Fri Mar 10 12:30:23 2023 +0100
refactor storage url parsing
- move parsing and validation to config where the connection url is
actually read in
- improve readability of parsing fn
commit 371802cc10b6d946c4939ff6839571002a2cb9f4
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Fri Mar 10 10:48:00 2023 +0100
refactor
commit e8c381c29deebf663e665920752c2965d7abce16
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Fri Mar 10 09:57:34 2023 +0100
rename
commit c8628a509316a651960dfa806d5fe6aacb7a91c1
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Fri Mar 10 09:37:01 2023 +0100
renaming and refactoring
commit 4974d4f56fd73bc55bd76aa7a9bbb16babee19f4
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Fri Mar 10 08:53:09 2023 +0100
refactor payload processor
- limit make_uploader and make_downloader cache
- partially apply them when the class is initialized with storage and
bucket to make the logic and behaviour more comprehensive
- renaming functional pipeline steps to be more expressive
commit f8d51bfcad2b815c8293ab27dd66b256255c5414
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Thu Mar 9 15:30:32 2023 +0100
remove monitor and rename Payload
commit 412ddaa207a08aff1229d7acd5d95402ac8cd578
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Thu Mar 2 10:15:39 2023 +0100
remove azure connection string and disable respective test for now for security reasons
commit 7922a2d9d325f3b9008ad4e3e56b241ba179f52c
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Wed Mar 1 13:30:58 2023 +0100
make payload formatting function names more expressive
commit 7517e544b0f5a434579cc9bada3a37e7ac04059f
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Wed Mar 1 13:24:57 2023 +0100
add some type hints
commit 095410d3009f2dcbd374680dd0f7b55de94c9e76
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Wed Mar 1 10:54:58 2023 +0100
Refactoring
- Renaming
- Docstring adjustments
commit e992f0715fc2636eb13eb5ffc4de0bcc5d433fc8
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Wed Mar 1 09:43:26 2023 +0100
Re-wording and typo fixes
commit 3c2d698f9bf980bc4b378a44dc20c2badc407b3e
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Feb 28 14:59:59 2023 +0100
enable auto startup for docker compose in tests
commit 55773b4fb0b624ca4745e5b8aeafa6f6a0ae6436
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Feb 28 14:59:37 2023 +0100
Extended tests for queue manager
commit 14f7f943f60b9bfb9fe77fa3cef99a1e7d094333
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Feb 28 13:39:00 2023 +0100
enable auto startup for docker compose in tests
commit 7caf354491c84c6e0b0e09ad4d41cb5dfbfdb225
Merge: 49d47ba d0277b8
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Feb 28 13:32:52 2023 +0100
Merge branch 'RED-6205-prometheus' of ssh://git.iqser.com:2222/rr/pyinfra into RED-6205-prometheus
commit 49d47baba8ccf11dee48a4c1cbddc3bbd12471e5
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Feb 28 13:32:42 2023 +0100
adjust Payload Processor signature
commit d0277b86bc54994b6032774bf0ec2d7b19d7f517
Merge: 5184a18 f6b35d6
Author: Christoph Schabert <christoph.schabert@iqser.com>
Date: Tue Feb 28 11:07:16 2023 +0100
Pull request #61: Change Sec Trigger to PR
Merge in RR/pyinfra from cschabert/PlanSpecjava-1677578703647 to RED-6205-prometheus
* commit 'f6b35d648c88ddbce1856445c3b887bce669265c':
Change Sec Trigger to PR
commit f6b35d648c88ddbce1856445c3b887bce669265c
Author: Christoph Schabert <christoph.schabert@iqser.com>
Date: Tue Feb 28 11:05:13 2023 +0100
Change Sec Trigger to PR
... and 20 more commits
81 lines
3.3 KiB
Python
81 lines
3.3 KiB
Python
import logging
|
|
from dataclasses import asdict
|
|
from functools import partial
|
|
from typing import Callable, Union, List
|
|
|
|
from funcy import compose
|
|
|
|
from pyinfra.config import get_config, Config
|
|
from pyinfra.payload_processing.payload import (
|
|
read_queue_message_payload,
|
|
format_service_processing_result_for_storage,
|
|
format_to_queue_message_response_body,
|
|
)
|
|
from pyinfra.storage import get_storage
|
|
from pyinfra.storage.storage import make_downloader, make_uploader
|
|
from pyinfra.storage.storages.interface import Storage
|
|
|
|
logger = logging.getLogger()
|
|
logger.setLevel(get_config().logging_level_root)
|
|
|
|
|
|
class PayloadProcessor:
|
|
def __init__(self, storage: Storage, bucket: str, data_processor: Callable):
|
|
"""Wraps an analysis function specified by a service (e.g. NER service) in pre- and post-processing steps.
|
|
|
|
Args:
|
|
storage: The storage to use for downloading and uploading files
|
|
bucket: The bucket to use for downloading and uploading files
|
|
data_processor: The analysis function to be called with the downloaded file
|
|
"""
|
|
self.process_data = data_processor
|
|
|
|
self.partial_download_fn = partial(make_downloader, storage, bucket)
|
|
self.partial_upload_fn = partial(make_uploader, storage, bucket)
|
|
|
|
def __call__(self, queue_message_payload: dict) -> dict:
|
|
"""Processes a queue message payload.
|
|
|
|
The steps executed are:
|
|
1. Download the file specified in the message payload from the storage
|
|
2. Process the file with the analysis function
|
|
3. Upload the result to the storage
|
|
4. Return the payload for a response queue message
|
|
|
|
Args:
|
|
queue_message_payload: The payload of a queue message. The payload is expected to be a dict with the
|
|
following keys: dossierId, fileId, targetFileExtension, responseFileExtension
|
|
|
|
Returns:
|
|
The payload for a response queue message. The payload is a dict with the following keys: dossierId, fileId
|
|
"""
|
|
return self._process(queue_message_payload)
|
|
|
|
def _process(self, queue_message_payload: dict) -> dict:
|
|
payload = read_queue_message_payload(queue_message_payload)
|
|
logger.info(f"Processing {asdict(payload)} ...")
|
|
|
|
download_file_to_process = self.partial_download_fn(payload.target_file_type, payload.target_compression_type)
|
|
upload_processing_result = self.partial_upload_fn(payload.response_file_type, payload.response_compression_type)
|
|
format_result_for_storage = partial(format_service_processing_result_for_storage, payload)
|
|
|
|
processing_pipeline = compose(format_result_for_storage, self.process_data, download_file_to_process)
|
|
|
|
result: List[dict] = processing_pipeline(payload.target_file_name)
|
|
|
|
upload_processing_result(payload.response_file_name, result)
|
|
|
|
return format_to_queue_message_response_body(payload)
|
|
|
|
|
|
def make_payload_processor(data_processor: Callable, config: Union[None, Config]) -> PayloadProcessor:
|
|
"""Produces payload processor for queue manager."""
|
|
config = config or get_config()
|
|
|
|
storage: Storage = get_storage(config)
|
|
bucket: str = config.storage_bucket
|
|
|
|
data_processor = compose(list, data_processor)
|
|
|
|
return PayloadProcessor(storage, bucket, data_processor)
|