Compare commits
315 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3ef4246d1e | ||
|
|
841c492639 | ||
|
|
ead069d3a7 | ||
|
|
044ea6cf0a | ||
|
|
ff7547e2c6 | ||
|
|
fbf79ef758 | ||
|
|
f382887d40 | ||
|
|
5c4400aa8b | ||
|
|
5ce66f18a0 | ||
|
|
ea0c55930a | ||
|
|
87f57e2244 | ||
|
|
3fb8c4e641 | ||
|
|
e23f63acf0 | ||
|
|
d3fecc518e | ||
|
|
341500d463 | ||
|
|
e002f77fd5 | ||
|
|
3c6d8f2dcc | ||
|
|
f6d6ba40bb | ||
|
|
6a0bbad108 | ||
|
|
527a671a75 | ||
|
|
cf91189728 | ||
|
|
61a6d0eeed | ||
|
|
bc0b355ff9 | ||
|
|
235e27b74c | ||
|
|
1540c2894e | ||
|
|
9b60594ce1 | ||
|
|
3d3c76b466 | ||
|
|
9d4ec84b49 | ||
|
|
8891249d7a | ||
|
|
e51e5c33eb | ||
|
|
04c90533b6 | ||
|
|
86af05c12c | ||
|
|
c6e336cb35 | ||
|
|
bf6f95f3e0 | ||
|
|
ed2bd1ec86 | ||
|
|
9906f68e0a | ||
|
|
0af648d66c | ||
|
|
46dc1fdce4 | ||
|
|
bd2f0b9b9a | ||
|
|
131afd7d3e | ||
|
|
98532c60ed | ||
|
|
45377ba172 | ||
|
|
f855224e29 | ||
|
|
541219177f | ||
|
|
4119a7d7d7 | ||
|
|
e2edfa7260 | ||
|
|
b70b16c541 | ||
|
|
e8d9326e48 | ||
|
|
9669152e14 | ||
|
|
ed3f8088e1 | ||
|
|
66eaa9a748 | ||
|
|
3a04359320 | ||
|
|
b46fcbd977 | ||
|
|
e75df42bec | ||
|
|
3bab86fe83 | ||
|
|
c5d53b8665 | ||
|
|
09d39930e7 | ||
|
|
a81f1bf31a | ||
|
|
0783e95d22 | ||
|
|
8ec13502a9 | ||
|
|
43881de526 | ||
|
|
67c30a5620 | ||
|
|
8e21b2144c | ||
|
|
5b45cae9a0 | ||
|
|
f2a5a2ea0e | ||
|
|
2133933d25 | ||
|
|
4c8dc6ccc0 | ||
|
|
5f31e2b15f | ||
|
|
88aef57c5f | ||
|
|
2b129b35f4 | ||
|
|
facb9726f9 | ||
|
|
b6a2069a6a | ||
|
|
f626ef2e6f | ||
|
|
318779413a | ||
|
|
f27b1fbba1 | ||
|
|
f2018f9c86 | ||
|
|
a5167d1230 | ||
|
|
1e939febc2 | ||
|
|
564f2cbb43 | ||
|
|
fa44f36088 | ||
|
|
2970823cc1 | ||
|
|
dba348a621 | ||
|
|
5020e54dcc | ||
|
|
2bc332831e | ||
|
|
b3f1529be2 | ||
|
|
789f6a7f7c | ||
|
|
06ce8bbb22 | ||
|
|
fdde56991b | ||
|
|
cb8509b120 | ||
|
|
47b42e95e2 | ||
|
|
536284ed84 | ||
|
|
aeac1c58f9 | ||
|
|
b12b1ce42b | ||
|
|
50b7a877e9 | ||
|
|
f3d0f24ea6 | ||
|
|
8f1ad1a4bd | ||
|
|
2a2028085e | ||
|
|
66aaeca928 | ||
|
|
23aaaf68b1 | ||
|
|
c7e0df758e | ||
|
|
13d670091c | ||
|
|
1520e96287 | ||
|
|
28451e8f8f | ||
|
|
596d4a9bd0 | ||
|
|
70d3a210a1 | ||
|
|
f935056fa9 | ||
|
|
eeb4c3ce29 | ||
|
|
b8833c7560 | ||
|
|
f175633f30 | ||
|
|
ceac21c1ef | ||
|
|
0d232226fd | ||
|
|
9d55b3be89 | ||
|
|
edba6fc4da | ||
|
|
c5d8a6ed84 | ||
|
|
c16000c774 | ||
|
|
02665a5ef8 | ||
|
|
9c28498d8a | ||
|
|
3c3580d3bc | ||
|
|
8ac16de0fa | ||
|
|
8844df44ce | ||
|
|
a5162d5bf0 | ||
|
|
f9aec74d55 | ||
|
|
7559118822 | ||
|
|
5ff65f2cf4 | ||
|
|
cc25a20c24 | ||
|
|
f723bcb9b1 | ||
|
|
abde776cd1 | ||
|
|
aa23894858 | ||
|
|
2da4f37620 | ||
|
|
9b20a67ace | ||
|
|
7b6408e0de | ||
|
|
6e7c4ccb7b | ||
|
|
b2e3ae092f | ||
|
|
de41030e69 | ||
|
|
c81d967aee | ||
|
|
30330937ce | ||
|
|
7624208188 | ||
|
|
6fabe1ae8c | ||
|
|
3532f949a9 | ||
|
|
65cc1c9aad | ||
|
|
2484a5e9f7 | ||
|
|
88fe7383f3 | ||
|
|
18a0ddc2d3 | ||
|
|
5328e8de03 | ||
|
|
9661d75d8a | ||
|
|
7dbcdf1650 | ||
|
|
4536f9d35b | ||
|
|
a1e7b3b565 | ||
|
|
b810449bba | ||
|
|
f67813702a | ||
|
|
ed4f912acf | ||
|
|
021222475b | ||
|
|
876253b3fb | ||
|
|
1689cd762b | ||
|
|
dc413cea82 | ||
|
|
bfb27383e4 | ||
|
|
af914ab3ae | ||
|
|
7093e01925 | ||
|
|
88cfb2b1c1 | ||
|
|
c1301d287f | ||
|
|
f1b8e5a25f | ||
|
|
fff5be2e50 | ||
|
|
ec9ab21198 | ||
|
|
b2f073e0c5 | ||
|
|
f6f56b8d8c | ||
|
|
8ff637d6ba | ||
|
|
c18475a77d | ||
|
|
e0b32fa448 | ||
|
|
da163897c4 | ||
|
|
a415666830 | ||
|
|
739a7c0731 | ||
|
|
936bb4fe80 | ||
|
|
725d6dce45 | ||
|
|
be602d8411 | ||
|
|
429a85b609 | ||
|
|
d6eeb65ccc | ||
|
|
adfbd650e6 | ||
|
|
73eba97ede | ||
|
|
8cd1d6b283 | ||
|
|
87cbf89672 | ||
|
|
9c2f34e694 | ||
|
|
fbbfc553ae | ||
|
|
b7f860f36b | ||
|
|
6802bf5960 | ||
|
|
ec5ad09fa8 | ||
|
|
17c5eebdf6 | ||
|
|
358e227251 | ||
|
|
f31693d36a | ||
|
|
e5c8a6e9f1 | ||
|
|
27917863c9 | ||
|
|
ebc519ee0d | ||
|
|
b49645cce4 | ||
|
|
64871bbb62 | ||
|
|
1f482f2476 | ||
|
|
8dfba74682 | ||
|
|
570689ed9b | ||
|
|
5db56d8449 | ||
|
|
3a9d34f9c0 | ||
|
|
3084d6338c | ||
|
|
3a3a8e4ce1 | ||
|
|
bb00c83a80 | ||
|
|
b297894505 | ||
|
|
261b991049 | ||
|
|
84c4e7601f | ||
|
|
201ed5b9a8 | ||
|
|
72547201f3 | ||
|
|
c09476cfae | ||
|
|
e580a66347 | ||
|
|
294688ea66 | ||
|
|
7187f0ec0c | ||
|
|
ef916ee790 | ||
|
|
48d74b4307 | ||
|
|
692ff204c3 | ||
|
|
03eddadcb9 | ||
|
|
daddec7dc3 | ||
|
|
370e978fa7 | ||
|
|
366d040ceb | ||
|
|
9598b963ee | ||
|
|
2bacc4d971 | ||
|
|
d228c0a891 | ||
|
|
4e6b4e2969 | ||
|
|
892b6e8236 | ||
|
|
d63435e092 | ||
|
|
7e995bd78b | ||
|
|
c4e03d4641 | ||
|
|
233b546f6f | ||
|
|
5ed41a392a | ||
|
|
4a0c59b070 | ||
|
|
e67ebc27b1 | ||
|
|
309119cb62 | ||
|
|
a381ac6b87 | ||
|
|
6d49f0ccb9 | ||
|
|
873abdca0c | ||
|
|
decd3710ab | ||
|
|
d838413500 | ||
|
|
0f4646e390 | ||
|
|
793a427c50 | ||
|
|
0f24a7f26d | ||
|
|
ff6f437e84 | ||
|
|
b985679d6b | ||
|
|
d6de45d783 | ||
|
|
564c429834 | ||
|
|
3c4739ad8b | ||
|
|
46157031b5 | ||
|
|
c97ae3d2c2 | ||
|
|
6828c65396 | ||
|
|
73bfef6867 | ||
|
|
1af171bd3f | ||
|
|
61efbdaffd | ||
|
|
c94604cc66 | ||
|
|
edbe5fa4f0 | ||
|
|
37d8ee49a2 | ||
|
|
7732e884c5 | ||
|
|
40e516b4e8 | ||
|
|
c8c0210945 | ||
|
|
280b14b4a0 | ||
|
|
203c0f669c | ||
|
|
8227e18580 | ||
|
|
73bb38f917 | ||
|
|
fa76003983 | ||
|
|
19540c7c08 | ||
|
|
2fe4a75a57 | ||
|
|
b5deb7b292 | ||
|
|
5cd30c08b3 | ||
|
|
17cbbeb620 | ||
|
|
9c4cf3d220 | ||
|
|
3ccb1d2370 | ||
|
|
398b1c271f | ||
|
|
05658784be | ||
|
|
974df96bb9 | ||
|
|
ca3f812527 | ||
|
|
d78e6c45fb | ||
|
|
41220d3c80 | ||
|
|
2d2e72c86e | ||
|
|
37b0280ab6 | ||
|
|
4bd6ee867f | ||
|
|
b0efed4007 | ||
|
|
28ee14e92f | ||
|
|
fb3d4b5fc9 | ||
|
|
84351fd75c | ||
|
|
244aaec470 | ||
|
|
18d614f61c | ||
|
|
7472939f21 | ||
|
|
a819e60632 | ||
|
|
05d5582479 | ||
|
|
64d6a8cec6 | ||
|
|
7a740403bb | ||
|
|
b2cd529519 | ||
|
|
1891519e19 | ||
|
|
843d91c61a | ||
|
|
5b948fdcc5 | ||
|
|
bb5b73e189 | ||
|
|
94beb544fa | ||
|
|
ffaa4a668b | ||
|
|
88b4c5c7ce | ||
|
|
71ad2af4eb | ||
|
|
be82114f83 | ||
|
|
0f6512df54 | ||
|
|
8b050fe9b1 | ||
|
|
046f26d0e9 | ||
|
|
7e2cb20040 | ||
|
|
8867da3557 | ||
|
|
eed5912516 | ||
|
|
3ccc4a1547 | ||
|
|
0efbd2c98c | ||
|
|
89ce61996c | ||
|
|
2cffab279d | ||
|
|
76985e83ed | ||
|
|
bbf013385a | ||
|
|
5cdf4df4a3 | ||
|
|
fc1f23a24d | ||
|
|
6c2652837a | ||
|
|
db8f617aa7 | ||
|
|
e3abf2be0f | ||
|
|
3f645484d9 |
106
.dockerignore
106
.dockerignore
@ -1,106 +0,0 @@
|
||||
data
|
||||
/build_venv/
|
||||
/.venv/
|
||||
/misc/
|
||||
/incl/image_service/test/
|
||||
/scratch/
|
||||
/bamboo-specs/
|
||||
README.md
|
||||
Dockerfile
|
||||
*idea
|
||||
*misc
|
||||
*egg-innfo
|
||||
*pycache*
|
||||
|
||||
# Git
|
||||
.git
|
||||
.gitignore
|
||||
|
||||
# CI
|
||||
.codeclimate.yml
|
||||
.travis.yml
|
||||
.taskcluster.yml
|
||||
|
||||
# Docker
|
||||
.docker
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*/__pycache__/
|
||||
*/*/__pycache__/
|
||||
*/*/*/__pycache__/
|
||||
*.py[cod]
|
||||
*/*.py[cod]
|
||||
*/*/*.py[cod]
|
||||
*/*/*/*.py[cod]
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
*.egg-info/**
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Virtual environment
|
||||
.env/
|
||||
.venv/
|
||||
#venv/
|
||||
|
||||
# PyCharm
|
||||
.idea
|
||||
|
||||
# Python mode for VIM
|
||||
.ropeproject
|
||||
*/.ropeproject
|
||||
*/*/.ropeproject
|
||||
*/*/*/.ropeproject
|
||||
|
||||
# Vim swap files
|
||||
*.swp
|
||||
*/*.swp
|
||||
*/*/*.swp
|
||||
*/*/*/*.swp
|
||||
2
.dvc/.gitignore
vendored
Normal file
2
.dvc/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
/config.local
|
||||
/cache
|
||||
5
.dvc/config
Normal file
5
.dvc/config
Normal file
@ -0,0 +1,5 @@
|
||||
[core]
|
||||
remote = azure
|
||||
['remote "azure"']
|
||||
url = azure://pyinfra-dvc
|
||||
connection_string =
|
||||
3
.dvcignore
Normal file
3
.dvcignore
Normal file
@ -0,0 +1,3 @@
|
||||
# Add patterns of files dvc should ignore, which could improve
|
||||
# the performance. Learn more at
|
||||
# https://dvc.org/doc/user-guide/dvcignore
|
||||
57
.gitignore
vendored
57
.gitignore
vendored
@ -1,10 +1,53 @@
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
__pycache__
|
||||
data/
|
||||
env/
|
||||
venv/
|
||||
.DS_Store
|
||||
|
||||
# Project folders
|
||||
*.vscode/
|
||||
.idea
|
||||
*_app
|
||||
*pytest_cache
|
||||
*joblib
|
||||
*tmp
|
||||
*profiling
|
||||
*logs
|
||||
*docker
|
||||
*drivers
|
||||
*bamboo-specs/target
|
||||
.coverage
|
||||
data
|
||||
build_venv
|
||||
reports
|
||||
pyinfra.egg-info
|
||||
bamboo-specs/target
|
||||
.pytest_cache
|
||||
/.coverage
|
||||
.idea
|
||||
|
||||
# Python specific files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*.ipynb
|
||||
*.ipynb_checkpoints
|
||||
|
||||
# file extensions
|
||||
*.log
|
||||
*.csv
|
||||
*.pkl
|
||||
*.profile
|
||||
*.cbm
|
||||
*.egg-info
|
||||
|
||||
# temp files
|
||||
*.swp
|
||||
*~
|
||||
*.un~
|
||||
|
||||
# keep files
|
||||
!notebooks/*.ipynb
|
||||
|
||||
# keep folders
|
||||
!secrets
|
||||
!data/*
|
||||
!drivers
|
||||
|
||||
# ignore files
|
||||
bamboo.yml
|
||||
|
||||
23
.gitlab-ci.yml
Normal file
23
.gitlab-ci.yml
Normal file
@ -0,0 +1,23 @@
|
||||
# CI for services, check gitlab repo for python package CI
|
||||
include:
|
||||
- project: "Gitlab/gitlab"
|
||||
ref: main
|
||||
file: "/ci-templates/research/python_pkg-test-build-release.gitlab-ci.yml"
|
||||
|
||||
# set project variables here
|
||||
variables:
|
||||
NEXUS_PROJECT_DIR: research # subfolder in Nexus docker-gin where your container will be stored
|
||||
IMAGENAME: $CI_PROJECT_NAME # if the project URL is gitlab.example.com/group-name/project-1, CI_PROJECT_NAME is project-1
|
||||
REPORTS_DIR: reports
|
||||
FF_USE_FASTZIP: "true" # enable fastzip - a faster zip implementation that also supports level configuration.
|
||||
ARTIFACT_COMPRESSION_LEVEL: default # can also be set to fastest, fast, slow and slowest. If just enabling fastzip is not enough try setting this to fastest or fast.
|
||||
CACHE_COMPRESSION_LEVEL: default # same as above, but for caches
|
||||
# TRANSFER_METER_FREQUENCY: 5s # will display transfer progress every 5 seconds for artifacts and remote caches. For debugging purposes.
|
||||
|
||||
|
||||
############
|
||||
# UNIT TESTS
|
||||
unit-tests:
|
||||
variables:
|
||||
###### UPDATE/EDIT ######
|
||||
UNIT_TEST_DIR: "tests/unit_test"
|
||||
55
.pre-commit-config.yaml
Normal file
55
.pre-commit-config.yaml
Normal file
@ -0,0 +1,55 @@
|
||||
# See https://pre-commit.com for more information
|
||||
# See https://pre-commit.com/hooks.html for more hooks
|
||||
exclude: ^(docs/|notebooks/|data/|src/configs/|tests/|.hooks/)
|
||||
default_language_version:
|
||||
python: python3.10
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-yaml
|
||||
name: Check Gitlab CI (unsafe)
|
||||
args: [--unsafe]
|
||||
files: .gitlab-ci.yml
|
||||
- id: check-yaml
|
||||
exclude: .gitlab-ci.yml
|
||||
- id: check-toml
|
||||
- id: detect-private-key
|
||||
- id: check-added-large-files
|
||||
args: ['--maxkb=10000']
|
||||
- id: check-case-conflict
|
||||
- id: mixed-line-ending
|
||||
|
||||
- repo: https://github.com/pre-commit/mirrors-pylint
|
||||
rev: v3.0.0a5
|
||||
hooks:
|
||||
- id: pylint
|
||||
language: system
|
||||
args:
|
||||
- --disable=C0111,R0903
|
||||
- --max-line-length=120
|
||||
|
||||
- repo: https://github.com/pre-commit/mirrors-isort
|
||||
rev: v5.10.1
|
||||
hooks:
|
||||
- id: isort
|
||||
args:
|
||||
- --profile black
|
||||
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 24.10.0
|
||||
hooks:
|
||||
- id: black
|
||||
# exclude: ^(docs/|notebooks/|data/|src/secrets/)
|
||||
args:
|
||||
- --line-length=120
|
||||
|
||||
- repo: https://github.com/compilerla/conventional-pre-commit
|
||||
rev: v3.6.0
|
||||
hooks:
|
||||
- id: conventional-pre-commit
|
||||
pass_filenames: false
|
||||
stages: [commit-msg]
|
||||
# args: [] # optional: list of Conventional Commits types to allow e.g. [feat, fix, ci, chore, test]
|
||||
1
.python-version
Normal file
1
.python-version
Normal file
@ -0,0 +1 @@
|
||||
3.10
|
||||
19
Dockerfile
19
Dockerfile
@ -1,19 +0,0 @@
|
||||
FROM python:3.8
|
||||
|
||||
# Use a virtual environment.
|
||||
RUN python -m venv /app/venv
|
||||
ENV PATH="/app/venv/bin:$PATH"
|
||||
|
||||
# Upgrade pip.
|
||||
RUN python -m pip install --upgrade pip
|
||||
|
||||
# Make a directory for the service files and copy the service repo into the container.
|
||||
WORKDIR /app/service
|
||||
COPY . .
|
||||
|
||||
# Install module & dependencies
|
||||
RUN python3 -m pip install -e .
|
||||
RUN python3 -m pip install -r requirements.txt
|
||||
|
||||
# Run the service loop.
|
||||
CMD ["python", "src/serve.py"]
|
||||
@ -1,19 +0,0 @@
|
||||
ARG BASE_ROOT="nexus.iqser.com:5001/red/"
|
||||
ARG VERSION_TAG="dev"
|
||||
|
||||
FROM ${BASE_ROOT}pyinfra:${VERSION_TAG}
|
||||
|
||||
EXPOSE 5000
|
||||
EXPOSE 8080
|
||||
|
||||
RUN python3 -m pip install coverage
|
||||
|
||||
# Make a directory for the service files and copy the service repo into the container.
|
||||
WORKDIR /app/service
|
||||
COPY . .
|
||||
|
||||
# Install module & dependencies
|
||||
RUN python3 -m pip install -e .
|
||||
RUN python3 -m pip install -r requirements.txt
|
||||
|
||||
CMD coverage run -m pytest test/ -x && coverage report -m && coverage xml
|
||||
85
Makefile
Normal file
85
Makefile
Normal file
@ -0,0 +1,85 @@
|
||||
.PHONY: \
|
||||
poetry in-project-venv dev-env use-env install install-dev tests \
|
||||
update-version sync-version-with-git \
|
||||
docker docker-build-run docker-build docker-run \
|
||||
docker-rm docker-rm-container docker-rm-image \
|
||||
pre-commit get-licenses prep-commit \
|
||||
docs sphinx_html sphinx_apidoc
|
||||
.DEFAULT_GOAL := run
|
||||
|
||||
export DOCKER=docker
|
||||
export DOCKERFILE=Dockerfile
|
||||
export IMAGE_NAME=rule_engine-image
|
||||
export CONTAINER_NAME=rule_engine-container
|
||||
export HOST_PORT=9999
|
||||
export CONTAINER_PORT=9999
|
||||
export PYTHON_VERSION=python3.8
|
||||
|
||||
# all commands should be executed in the root dir or the project,
|
||||
# specific environments should be deactivated
|
||||
|
||||
poetry: in-project-venv use-env dev-env
|
||||
|
||||
in-project-venv:
|
||||
poetry config virtualenvs.in-project true
|
||||
|
||||
use-env:
|
||||
poetry env use ${PYTHON_VERSION}
|
||||
|
||||
dev-env:
|
||||
poetry install --with dev
|
||||
|
||||
install:
|
||||
poetry add $(pkg)
|
||||
|
||||
install-dev:
|
||||
poetry add --dev $(pkg)
|
||||
|
||||
requirements:
|
||||
poetry export --without-hashes --output requirements.txt
|
||||
|
||||
update-version:
|
||||
poetry version prerelease
|
||||
|
||||
sync-version-with-git:
|
||||
git pull -p && poetry version $(git rev-list --tags --max-count=1 | git describe --tags --abbrev=0)
|
||||
|
||||
docker: docker-rm docker-build-run
|
||||
|
||||
docker-build-run: docker-build docker-run
|
||||
|
||||
docker-build:
|
||||
$(DOCKER) build \
|
||||
--no-cache --progress=plain \
|
||||
-t $(IMAGE_NAME) -f $(DOCKERFILE) .
|
||||
|
||||
docker-run:
|
||||
$(DOCKER) run -it --rm -p $(HOST_PORT):$(CONTAINER_PORT)/tcp --name $(CONTAINER_NAME) $(IMAGE_NAME) python app.py
|
||||
|
||||
docker-rm: docker-rm-container docker-rm-image
|
||||
|
||||
docker-rm-container:
|
||||
-$(DOCKER) rm $(CONTAINER_NAME)
|
||||
|
||||
docker-rm-image:
|
||||
-$(DOCKER) image rm $(IMAGE_NAME)
|
||||
|
||||
tests:
|
||||
poetry run pytest ./tests
|
||||
|
||||
prep-commit:
|
||||
docs get-license sync-version-with-git update-version pre-commit
|
||||
|
||||
pre-commit:
|
||||
pre-commit run --all-files
|
||||
|
||||
get-licenses:
|
||||
pip-licenses --format=json --order=license --with-urls > pkg-licenses.json
|
||||
|
||||
docs: sphinx_apidoc sphinx_html
|
||||
|
||||
sphinx_html:
|
||||
poetry run sphinx-build -b html docs/source/ docs/build/html -E -a
|
||||
|
||||
sphinx_apidoc:
|
||||
poetry run sphinx-apidoc -o ./docs/source/modules ./src/rule_engine
|
||||
239
README.md
239
README.md
@ -1,103 +1,220 @@
|
||||
# Infrastructure to deploy Research Projects
|
||||
# PyInfra
|
||||
|
||||
The Infrastructure expects to be deployed in the same Pod / local environment as the analysis container and handles all outbound communication.
|
||||
1. [ About ](#about)
|
||||
2. [ Configuration ](#configuration)
|
||||
3. [ Queue Manager ](#queue-manager)
|
||||
4. [ Module Installation ](#module-installation)
|
||||
5. [ Scripts ](#scripts)
|
||||
6. [ Tests ](#tests)
|
||||
7. [ Opentelemetry protobuf dependency hell ](#opentelemetry-protobuf-dependency-hell)
|
||||
|
||||
## About
|
||||
|
||||
Shared library for the research team, containing code related to infrastructure and communication with other services.
|
||||
Offers a simple interface for processing data and sending responses via AMQP, monitoring via Prometheus and storage
|
||||
access via S3 or Azure. Also export traces via OpenTelemetry for queue messages and webserver requests.
|
||||
|
||||
To start, see the [complete example](pyinfra/examples.py) which shows how to use all features of the service and can be
|
||||
imported and used directly for default research service pipelines (data ID in message, download data from storage,
|
||||
upload result while offering Prometheus monitoring, /health and /ready endpoints and multi tenancy support).
|
||||
|
||||
## Configuration
|
||||
|
||||
A configuration is located in `/config.yaml`. All relevant variables can be configured via exporting environment variables.
|
||||
Configuration is done via `Dynaconf`. This means that you can use environment variables, a `.env` file or `.toml`
|
||||
file(s) to configure the service. You can also combine these methods. The precedence is
|
||||
`environment variables > .env > .toml`. It is recommended to load settings with the provided
|
||||
[`load_settings`](pyinfra/config/loader.py) function, which you can combine with the provided
|
||||
[`parse_args`](pyinfra/config/loader.py) function. This allows you to load settings from a `.toml` file or a folder with
|
||||
`.toml` files and override them with environment variables.
|
||||
|
||||
| Environment Variable | Default | Description |
|
||||
|-------------------------------|--------------------------------|-----------------------------------------------------------------------|
|
||||
| LOGGING_LEVEL_ROOT | DEBUG | Logging level for service logger |
|
||||
| PROBING_WEBSERVER_HOST | "0.0.0.0" | Probe webserver address |
|
||||
| PROBING_WEBSERVER_PORT | 8080 | Probe webserver port |
|
||||
| PROBING_WEBSERVER_MODE | production | Webserver mode: {development, production} |
|
||||
| RABBITMQ_HOST | localhost | RabbitMQ host address |
|
||||
| RABBITMQ_PORT | 5672 | RabbitMQ host port |
|
||||
| RABBITMQ_USERNAME | user | RabbitMQ username |
|
||||
| RABBITMQ_PASSWORD | bitnami | RabbitMQ password |
|
||||
| RABBITMQ_HEARTBEAT | 7200 | Controls AMQP heartbeat timeout in seconds |
|
||||
| REQUEST_QUEUE | request_queue | Requests to service |
|
||||
| RESPONSE_QUEUE | response_queue | Responses by service |
|
||||
| DEAD_LETTER_QUEUE | dead_letter_queue | Messages that failed to process |
|
||||
| ANALYSIS_ENDPOINT | "http://127.0.0.1:5000" | Endpoint for analysis container |
|
||||
| STORAGE_BACKEND | s3 | The type of storage to use {s3, azure} |
|
||||
| STORAGE_BUCKET | "pyinfra-test-bucket" | The bucket / container to pull files specified in queue requests from |
|
||||
| STORAGE_ENDPOINT | "http://127.0.0.1:9000" | Endpoint for s3 storage |
|
||||
| STORAGE_KEY | root | User for s3 storage |
|
||||
| STORAGE_SECRET | password | Password for s3 storage |
|
||||
| STORAGE_AZURECONNECTIONSTRING | "DefaultEndpointsProtocol=..." | Connection string for Azure storage |
|
||||
The following table shows all necessary settings. You can find a preconfigured settings file for this service in
|
||||
bitbucket. These are the complete settings, you only need all if using all features of the service as described in
|
||||
the [complete example](pyinfra/examples.py).
|
||||
|
||||
## Response Format
|
||||
| Environment Variable | Internal / .toml Name | Description |
|
||||
| ------------------------------------------ | --------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| LOGGING\_\_LEVEL | logging.level | Log level |
|
||||
| DYNAMIC_TENANT_QUEUES\_\_ENABLED | dynamic_tenant_queues.enabled | Enable queues per tenant that are dynamically created mode |
|
||||
| METRICS\_\_PROMETHEUS\_\_ENABLED | metrics.prometheus.enabled | Enable Prometheus metrics collection |
|
||||
| METRICS\_\_PROMETHEUS\_\_PREFIX | metrics.prometheus.prefix | Prefix for Prometheus metrics (e.g. {product}-{service}) |
|
||||
| WEBSERVER\_\_HOST | webserver.host | Host of the webserver (offering e.g. /prometheus, /ready and /health endpoints) |
|
||||
| WEBSERVER\_\_PORT | webserver.port | Port of the webserver |
|
||||
| RABBITMQ\_\_HOST | rabbitmq.host | Host of the RabbitMQ server |
|
||||
| RABBITMQ\_\_PORT | rabbitmq.port | Port of the RabbitMQ server |
|
||||
| RABBITMQ\_\_USERNAME | rabbitmq.username | Username for the RabbitMQ server |
|
||||
| RABBITMQ\_\_PASSWORD | rabbitmq.password | Password for the RabbitMQ server |
|
||||
| RABBITMQ\_\_HEARTBEAT | rabbitmq.heartbeat | Heartbeat for the RabbitMQ server |
|
||||
| RABBITMQ\_\_CONNECTION_SLEEP | rabbitmq.connection_sleep | Sleep time intervals during message processing. Has to be a divider of heartbeat, and shouldn't be too big, since only in these intervals queue interactions happen (like receiving new messages) This is also the minimum time the service needs to process a message. |
|
||||
| RABBITMQ\_\_INPUT_QUEUE | rabbitmq.input_queue | Name of the input queue in single queue setting |
|
||||
| RABBITMQ\_\_OUTPUT_QUEUE | rabbitmq.output_queue | Name of the output queue in single queue setting |
|
||||
| RABBITMQ\_\_DEAD_LETTER_QUEUE | rabbitmq.dead_letter_queue | Name of the dead letter queue in single queue setting |
|
||||
| RABBITMQ\_\_TENANT_EVENT_QUEUE_SUFFIX | rabbitmq.tenant_event_queue_suffix | Suffix for the tenant event queue in multi tenant/queue setting |
|
||||
| RABBITMQ\_\_TENANT_EVENT_DLQ_SUFFIX | rabbitmq.tenant_event_dlq_suffix | Suffix for the dead letter queue in multi tenant/queue setting |
|
||||
| RABBITMQ\_\_TENANT_EXCHANGE_NAME | rabbitmq.tenant_exchange_name | Name of tenant exchange in multi tenant/queue setting |
|
||||
| RABBITMQ\_\_QUEUE_EXPIRATION_TIME | rabbitmq.queue_expiration_time | Time until queue expiration in multi tenant/queue setting |
|
||||
| RABBITMQ\_\_SERVICE_REQUEST_QUEUE_PREFIX | rabbitmq.service_request_queue_prefix | Service request queue prefix in multi tenant/queue setting |
|
||||
| RABBITMQ\_\_SERVICE_REQUEST_EXCHANGE_NAME | rabbitmq.service_request_exchange_name | Service request exchange name in multi tenant/queue setting |
|
||||
| RABBITMQ\_\_SERVICE_RESPONSE_EXCHANGE_NAME | rabbitmq.service_response_exchange_name | Service response exchange name in multi tenant/queue setting |
|
||||
| RABBITMQ\_\_SERVICE_DLQ_NAME | rabbitmq.service_dlq_name | Service dead letter queue name in multi tenant/queue setting |
|
||||
| STORAGE\_\_BACKEND | storage.backend | Storage backend to use (currently only "s3" and "azure" are supported) |
|
||||
| STORAGE\_\_S3\_\_BUCKET | storage.s3.bucket | Name of the S3 bucket |
|
||||
| STORAGE\_\_S3\_\_ENDPOINT | storage.s3.endpoint | Endpoint of the S3 server |
|
||||
| STORAGE\_\_S3\_\_KEY | storage.s3.key | Access key for the S3 server |
|
||||
| STORAGE\_\_S3\_\_SECRET | storage.s3.secret | Secret key for the S3 server |
|
||||
| STORAGE\_\_S3\_\_REGION | storage.s3.region | Region of the S3 server |
|
||||
| STORAGE\_\_AZURE\_\_CONTAINER | storage.azure.container_name | Name of the Azure container |
|
||||
| STORAGE\_\_AZURE\_\_CONNECTION_STRING | storage.azure.connection_string | Connection string for the Azure server |
|
||||
| STORAGE\_\_TENANT_SERVER\_\_PUBLIC_KEY | storage.tenant_server.public_key | Public key of the tenant server |
|
||||
| STORAGE\_\_TENANT_SERVER\_\_ENDPOINT | storage.tenant_server.endpoint | Endpoint of the tenant server |
|
||||
| TRACING\_\_ENABLED | tracing.enabled | Enable tracing |
|
||||
| TRACING\_\_TYPE | tracing.type | Tracing mode - possible values: "opentelemetry", "azure_monitor" (Excpects APPLICATIONINSIGHTS_CONNECTION_STRING environment variable.) |
|
||||
| TRACING\_\_OPENTELEMETRY\_\_ENDPOINT | tracing.opentelemetry.endpoint | Endpoint to which OpenTelemetry traces are exported |
|
||||
| TRACING\_\_OPENTELEMETRY\_\_SERVICE_NAME | tracing.opentelemetry.service_name | Name of the service as displayed in the traces collected |
|
||||
| TRACING\_\_OPENTELEMETRY\_\_EXPORTER | tracing.opentelemetry.exporter | Name of exporter |
|
||||
| KUBERNETES\_\_POD_NAME | kubernetes.pod_name | Service pod name |
|
||||
|
||||
### Expected AMQP input message:
|
||||
## Setup
|
||||
**IMPORTANT** you need to set the following environment variables before running the setup script:
|
||||
- ``$NEXUS_USER`` your Nexus user (usually equal to firstname.lastname@knecon.com)
|
||||
- ``$NEXUS_PASSWORD`` your Nexus password (usually equal to your Azure Login)
|
||||
|
||||
```shell
|
||||
# create venv and activate it
|
||||
source ./scripts/setup/devenvsetup.sh {{ cookiecutter.python_version }} $NEXUS_USER $NEXUS_PASSWORD
|
||||
source .venv/bin/activate
|
||||
```
|
||||
|
||||
### OpenTelemetry
|
||||
|
||||
Open telemetry (vis its Python SDK) is set up to be as unobtrusive as possible; for typical use cases it can be
|
||||
configured
|
||||
from environment variables, without additional work in the microservice app, although additional confiuration is
|
||||
possible.
|
||||
|
||||
`TRACING__OPENTELEMETRY__ENDPOINT` should typically be set
|
||||
to `http://otel-collector-opentelemetry-collector.otel-collector:4318/v1/traces`.
|
||||
|
||||
## Queue Manager
|
||||
|
||||
The queue manager is responsible for consuming messages from the input queue, processing them and sending the response
|
||||
to the output queue. The default callback also downloads data from the storage and uploads the result to the storage.
|
||||
The response message does not contain the data itself, but the identifiers from the input message (including headers
|
||||
beginning with "X-").
|
||||
|
||||
### Standalone Usage
|
||||
|
||||
```python
|
||||
from pyinfra.queue.manager import QueueManager
|
||||
from pyinfra.queue.callback import make_download_process_upload_callback, DataProcessor
|
||||
from pyinfra.config.loader import load_settings
|
||||
|
||||
settings = load_settings("path/to/settings")
|
||||
processing_function: DataProcessor # function should expect a dict (json) or bytes (pdf) as input and should return a json serializable object.
|
||||
|
||||
queue_manager = QueueManager(settings)
|
||||
callback = make_download_process_upload_callback(processing_function, settings)
|
||||
queue_manager.start_consuming(make_download_process_upload_callback(callback, settings))
|
||||
```
|
||||
|
||||
### Usage in a Service
|
||||
|
||||
This is the recommended way to use the module. This includes the webserver, Prometheus metrics and health endpoints.
|
||||
Custom endpoints can be added by adding a new route to the `app` object beforehand. Settings are loaded from files
|
||||
specified as CLI arguments (e.g. `--settings-path path/to/settings.toml`). The values can also be set or overriden via
|
||||
environment variables (e.g. `LOGGING__LEVEL=DEBUG`).
|
||||
|
||||
The callback can be replaced with a custom one, for example if the data to process is contained in the message itself
|
||||
and not on the storage.
|
||||
|
||||
```python
|
||||
from pyinfra.config.loader import load_settings, parse_settings_path
|
||||
from pyinfra.examples import start_standard_queue_consumer
|
||||
from pyinfra.queue.callback import make_download_process_upload_callback, DataProcessor
|
||||
|
||||
processing_function: DataProcessor
|
||||
|
||||
arguments = parse_settings_path()
|
||||
settings = load_settings(arguments.settings_path)
|
||||
|
||||
callback = make_download_process_upload_callback(processing_function, settings)
|
||||
start_standard_queue_consumer(callback, settings) # optionally also pass a fastAPI app object with preconfigured routes
|
||||
```
|
||||
|
||||
### AMQP input message:
|
||||
|
||||
Either use the legacy format with dossierId and fileId as strings or the new format where absolute paths are used.
|
||||
All headers beginning with "X-" are forwarded to the message processor, and returned in the response message (e.g.
|
||||
"X-TENANT-ID" is used to acquire storage information for the tenant).
|
||||
|
||||
```json
|
||||
{
|
||||
"dossierId": "",
|
||||
"fileId": "",
|
||||
"targetFilePath": "",
|
||||
"responseFilePath": ""
|
||||
}
|
||||
```
|
||||
|
||||
Optionally, the input message can contain a field with the key `"operations"`.
|
||||
|
||||
### AMQP output message:
|
||||
or
|
||||
|
||||
```json
|
||||
{
|
||||
"dossierId": "",
|
||||
"fileId": "",
|
||||
...
|
||||
"targetFileExtension": "",
|
||||
"responseFileExtension": ""
|
||||
}
|
||||
```
|
||||
|
||||
## Development
|
||||
## Module Installation
|
||||
|
||||
Either run `src/serve.py` or the built Docker image.
|
||||
Add the respective version of the pyinfra package to your pyproject.toml file. Make sure to add our gitlab registry as a
|
||||
source.
|
||||
For now, all internal packages used by pyinfra also have to be added to the pyproject.toml file (namely kn-utils).
|
||||
Execute `poetry lock` and `poetry install` to install the packages.
|
||||
|
||||
### Setup
|
||||
You can look up the latest version of the package in
|
||||
the [gitlab registry](https://gitlab.knecon.com/knecon/research/pyinfra/-/packages).
|
||||
For the used versions of internal dependencies, please refer to the [pyproject.toml](pyproject.toml) file.
|
||||
|
||||
Install module.
|
||||
```toml
|
||||
[tool.poetry.dependencies]
|
||||
pyinfra = { version = "x.x.x", source = "gitlab-research" }
|
||||
kn-utils = { version = "x.x.x", source = "gitlab-research" }
|
||||
|
||||
```bash
|
||||
pip install -e .
|
||||
pip install -r requirements.txt
|
||||
[[tool.poetry.source]]
|
||||
name = "gitlab-research"
|
||||
url = "https://gitlab.knecon.com/api/v4/groups/19/-/packages/pypi/simple"
|
||||
priority = "explicit"
|
||||
```
|
||||
|
||||
or build docker image.
|
||||
## Scripts
|
||||
|
||||
### Run pyinfra locally
|
||||
|
||||
**Shell 1**: Start minio and rabbitmq containers
|
||||
|
||||
```bash
|
||||
docker build -f Dockerfile -t pyinfra .
|
||||
$ cd tests && docker compose up
|
||||
```
|
||||
|
||||
### Usage
|
||||
|
||||
**Shell 1:** Start a MinIO and a RabbitMQ docker container.
|
||||
**Shell 2**: Start pyinfra with callback mock
|
||||
|
||||
```bash
|
||||
docker-compose up
|
||||
$ python scripts/start_pyinfra.py
|
||||
```
|
||||
|
||||
**Shell 2:** Add files to the local minio storage.
|
||||
**Shell 3**: Upload dummy content on storage and publish message
|
||||
|
||||
```bash
|
||||
python scripts/manage_minio.py add <MinIO target folder> -d path/to/a/folder/with/PDFs
|
||||
$ python scripts/send_request.py
|
||||
```
|
||||
|
||||
**Shell 2:** Run pyinfra-server.
|
||||
## Tests
|
||||
|
||||
```bash
|
||||
python src/serve.py
|
||||
```
|
||||
or as container:
|
||||
Tests require a running minio and rabbitmq container, meaning you have to run `docker compose up` in the tests folder
|
||||
before running the tests.
|
||||
|
||||
```bash
|
||||
docker run --net=host pyinfra
|
||||
```
|
||||
## OpenTelemetry Protobuf Dependency Hell
|
||||
|
||||
**Shell 3:** Run analysis-container.
|
||||
|
||||
**Shell 4:** Start a client that sends requests to process PDFs from the MinIO store and annotates these PDFs according to the service responses.
|
||||
```bash
|
||||
python scripts/mock_client.py
|
||||
```
|
||||
**Note**: Status 2025/01/09: the currently used `opentelemetry-exporter-otlp-proto-http` version `1.25.0` requires
|
||||
a `protobuf` version < `5.x.x` and is not compatible with the latest protobuf version `5.27.x`. This is an [open issue](https://github.com/open-telemetry/opentelemetry-python/issues/3958) in opentelemetry, because [support for 4.25.x ends in Q2 '25](https://protobuf.dev/support/version-support/#python).
|
||||
Therefore, we should keep this in mind and update the dependency once opentelemetry includes support for `protobuf 5.27.x`.
|
||||
|
||||
@ -1,40 +0,0 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs-parent</artifactId>
|
||||
<version>7.1.2</version>
|
||||
<relativePath/>
|
||||
</parent>
|
||||
|
||||
<artifactId>bamboo-specs</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<properties>
|
||||
<sonar.skip>true</sonar.skip>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Test dependencies -->
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<!-- run 'mvn test' to perform offline validation of the plan -->
|
||||
<!-- run 'mvn -Ppublish-specs' to upload the plan to your Bamboo server -->
|
||||
</project>
|
||||
@ -1,179 +0,0 @@
|
||||
package buildjob;
|
||||
|
||||
import com.atlassian.bamboo.specs.api.BambooSpec;
|
||||
import com.atlassian.bamboo.specs.api.builders.BambooKey;
|
||||
import com.atlassian.bamboo.specs.api.builders.docker.DockerConfiguration;
|
||||
import com.atlassian.bamboo.specs.api.builders.permission.PermissionType;
|
||||
import com.atlassian.bamboo.specs.api.builders.permission.Permissions;
|
||||
import com.atlassian.bamboo.specs.api.builders.permission.PlanPermissions;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.Job;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.Plan;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.PlanIdentifier;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.Stage;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.branches.BranchCleanup;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.branches.PlanBranchManagement;
|
||||
import com.atlassian.bamboo.specs.api.builders.project.Project;
|
||||
import com.atlassian.bamboo.specs.builders.task.CheckoutItem;
|
||||
import com.atlassian.bamboo.specs.builders.task.InjectVariablesTask;
|
||||
import com.atlassian.bamboo.specs.builders.task.ScriptTask;
|
||||
import com.atlassian.bamboo.specs.builders.task.VcsCheckoutTask;
|
||||
import com.atlassian.bamboo.specs.builders.task.CleanWorkingDirectoryTask;
|
||||
import com.atlassian.bamboo.specs.builders.task.VcsTagTask;
|
||||
import com.atlassian.bamboo.specs.builders.trigger.BitbucketServerTrigger;
|
||||
import com.atlassian.bamboo.specs.model.task.InjectVariablesScope;
|
||||
import com.atlassian.bamboo.specs.api.builders.Variable;
|
||||
import com.atlassian.bamboo.specs.util.BambooServer;
|
||||
import com.atlassian.bamboo.specs.builders.task.ScriptTask;
|
||||
import com.atlassian.bamboo.specs.model.task.ScriptTaskProperties.Location;
|
||||
|
||||
/**
|
||||
* Plan configuration for Bamboo.
|
||||
* Learn more on: <a href="https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs">https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs</a>
|
||||
*/
|
||||
@BambooSpec
|
||||
public class PlanSpec {
|
||||
|
||||
private static final String SERVICE_NAME = "pyinfra";
|
||||
|
||||
private static final String SERVICE_KEY = SERVICE_NAME.toUpperCase().replaceAll("-","");
|
||||
|
||||
/**
|
||||
* Run main to publish plan on Bamboo
|
||||
*/
|
||||
public static void main(final String[] args) throws Exception {
|
||||
//By default credentials are read from the '.credentials' file.
|
||||
BambooServer bambooServer = new BambooServer("http://localhost:8085");
|
||||
|
||||
Plan plan = new PlanSpec().createDockerBuildPlan();
|
||||
bambooServer.publish(plan);
|
||||
PlanPermissions planPermission = new PlanSpec().createPlanPermission(plan.getIdentifier());
|
||||
bambooServer.publish(planPermission);
|
||||
}
|
||||
|
||||
private PlanPermissions createPlanPermission(PlanIdentifier planIdentifier) {
|
||||
Permissions permission = new Permissions()
|
||||
.userPermissions("atlbamboo", PermissionType.EDIT, PermissionType.VIEW, PermissionType.ADMIN, PermissionType.CLONE, PermissionType.BUILD)
|
||||
.groupPermissions("research", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
|
||||
.groupPermissions("Development", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
|
||||
.groupPermissions("QA", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
|
||||
.loggedInUserPermissions(PermissionType.VIEW)
|
||||
.anonymousUserPermissionView();
|
||||
return new PlanPermissions(planIdentifier.getProjectKey(), planIdentifier.getPlanKey()).permissions(permission);
|
||||
}
|
||||
|
||||
private Project project() {
|
||||
return new Project()
|
||||
.name("RED")
|
||||
.key(new BambooKey("RED"));
|
||||
}
|
||||
|
||||
public Plan createDockerBuildPlan() {
|
||||
return new Plan(
|
||||
project(),
|
||||
SERVICE_NAME, new BambooKey(SERVICE_KEY))
|
||||
.description("Docker build for pyinfra")
|
||||
.stages(
|
||||
new Stage("Build Stage")
|
||||
.jobs(
|
||||
new Job("Build Job", new BambooKey("BUILD"))
|
||||
.tasks(
|
||||
new CleanWorkingDirectoryTask()
|
||||
.description("Clean working directory.")
|
||||
.enabled(true),
|
||||
new VcsCheckoutTask()
|
||||
.description("Checkout default repository.")
|
||||
.checkoutItems(new CheckoutItem().defaultRepository()),
|
||||
new ScriptTask()
|
||||
.description("Set config and keys.")
|
||||
.inlineBody("mkdir -p ~/.ssh\n" +
|
||||
"echo \"${bamboo.bamboo_agent_ssh}\" | base64 -d >> ~/.ssh/id_rsa\n" +
|
||||
"echo \"host vector.iqser.com\" > ~/.ssh/config\n" +
|
||||
"echo \" user bamboo-agent\" >> ~/.ssh/config\n" +
|
||||
"chmod 600 ~/.ssh/config ~/.ssh/id_rsa"),
|
||||
new ScriptTask()
|
||||
.description("Build Docker container.")
|
||||
.location(Location.FILE)
|
||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/docker-build.sh")
|
||||
.argument(SERVICE_NAME))
|
||||
.dockerConfiguration(
|
||||
new DockerConfiguration()
|
||||
.image("nexus.iqser.com:5001/infra/release_build:4.2.0")
|
||||
.volume("/var/run/docker.sock", "/var/run/docker.sock"))),
|
||||
new Stage("Sonar Stage")
|
||||
.jobs(
|
||||
new Job("Sonar Job", new BambooKey("SONAR"))
|
||||
.tasks(
|
||||
new CleanWorkingDirectoryTask()
|
||||
.description("Clean working directory.")
|
||||
.enabled(true),
|
||||
new VcsCheckoutTask()
|
||||
.description("Checkout default repository.")
|
||||
.checkoutItems(new CheckoutItem().defaultRepository()),
|
||||
new ScriptTask()
|
||||
.description("Set config and keys.")
|
||||
.inlineBody("mkdir -p ~/.ssh\n" +
|
||||
"echo \"${bamboo.bamboo_agent_ssh}\" | base64 -d >> ~/.ssh/id_rsa\n" +
|
||||
"echo \"host vector.iqser.com\" > ~/.ssh/config\n" +
|
||||
"echo \" user bamboo-agent\" >> ~/.ssh/config\n" +
|
||||
"chmod 600 ~/.ssh/config ~/.ssh/id_rsa"),
|
||||
new ScriptTask()
|
||||
.description("Run Sonarqube scan.")
|
||||
.location(Location.FILE)
|
||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/sonar-scan.sh")
|
||||
.argument(SERVICE_NAME),
|
||||
new ScriptTask()
|
||||
.description("Shut down any running docker containers.")
|
||||
.location(Location.FILE)
|
||||
.inlineBody("pip install docker-compose\n" +
|
||||
"docker-compose down"))
|
||||
.dockerConfiguration(
|
||||
new DockerConfiguration()
|
||||
.image("nexus.iqser.com:5001/infra/release_build:4.2.0")
|
||||
.volume("/var/run/docker.sock", "/var/run/docker.sock"))),
|
||||
new Stage("Licence Stage")
|
||||
.jobs(
|
||||
new Job("Git Tag Job", new BambooKey("GITTAG"))
|
||||
.tasks(
|
||||
new VcsCheckoutTask()
|
||||
.description("Checkout default repository.")
|
||||
.checkoutItems(new CheckoutItem().defaultRepository()),
|
||||
new ScriptTask()
|
||||
.description("Build git tag.")
|
||||
.location(Location.FILE)
|
||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/git-tag.sh"),
|
||||
new InjectVariablesTask()
|
||||
.description("Inject git tag.")
|
||||
.path("git.tag")
|
||||
.namespace("g")
|
||||
.scope(InjectVariablesScope.LOCAL),
|
||||
new VcsTagTask()
|
||||
.description("${bamboo.g.gitTag}")
|
||||
.tagName("${bamboo.g.gitTag}")
|
||||
.defaultRepository())
|
||||
.dockerConfiguration(
|
||||
new DockerConfiguration()
|
||||
.image("nexus.iqser.com:5001/infra/release_build:4.4.1")),
|
||||
new Job("Licence Job", new BambooKey("LICENCE"))
|
||||
.enabled(false)
|
||||
.tasks(
|
||||
new VcsCheckoutTask()
|
||||
.description("Checkout default repository.")
|
||||
.checkoutItems(new CheckoutItem().defaultRepository()),
|
||||
new ScriptTask()
|
||||
.description("Build licence.")
|
||||
.location(Location.FILE)
|
||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/create-licence.sh"))
|
||||
.dockerConfiguration(
|
||||
new DockerConfiguration()
|
||||
.image("nexus.iqser.com:5001/infra/maven:3.6.2-jdk-13-3.0.0")
|
||||
.volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml")
|
||||
.volume("/var/run/docker.sock", "/var/run/docker.sock"))))
|
||||
.linkedRepositories("RR / " + SERVICE_NAME)
|
||||
.triggers(new BitbucketServerTrigger())
|
||||
.planBranchManagement(new PlanBranchManagement()
|
||||
.createForVcsBranch()
|
||||
.delete(new BranchCleanup()
|
||||
.whenInactiveInRepositoryAfterDays(14))
|
||||
.notificationForCommitters());
|
||||
}
|
||||
}
|
||||
@ -1,19 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
if [[ \"${bamboo_version_tag}\" != \"dev\" ]]
|
||||
then
|
||||
${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
|
||||
-f ${bamboo_build_working_directory}/pom.xml \
|
||||
versions:set \
|
||||
-DnewVersion=${bamboo_version_tag}
|
||||
|
||||
${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
|
||||
-f ${bamboo_build_working_directory}/pom.xml \
|
||||
-B clean deploy \
|
||||
-e -DdeployAtEnd=true \
|
||||
-Dmaven.wagon.http.ssl.insecure=true \
|
||||
-Dmaven.wagon.http.ssl.allowall=true \
|
||||
-Dmaven.wagon.http.ssl.ignore.validity.dates=true \
|
||||
-DaltDeploymentRepository=iqser_release::default::https://nexus.iqser.com/repository/gin4-platform-releases
|
||||
fi
|
||||
@ -1,13 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
SERVICE_NAME=$1
|
||||
|
||||
python3 -m venv build_venv
|
||||
source build_venv/bin/activate
|
||||
python3 -m pip install --upgrade pip
|
||||
|
||||
echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iqser.com/repository/python-combind/simple" >> pip.conf
|
||||
docker build -f Dockerfile -t nexus.iqser.com:5001/red/$SERVICE_NAME:${bamboo_version_tag} .
|
||||
echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
|
||||
docker push nexus.iqser.com:5001/red/$SERVICE_NAME:${bamboo_version_tag}
|
||||
@ -1,9 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
if [[ "${bamboo_version_tag}" = "dev" ]]
|
||||
then
|
||||
echo "gitTag=${bamboo_planRepository_1_branch}_${bamboo_buildNumber}" > git.tag
|
||||
else
|
||||
echo "gitTag=${bamboo_version_tag}" > git.tag
|
||||
fi
|
||||
@ -1,58 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
export JAVA_HOME=/usr/bin/sonar-scanner/jre
|
||||
|
||||
python3 -m venv build_venv
|
||||
source build_venv/bin/activate
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install dependency-check
|
||||
python3 -m pip install docker-compose
|
||||
python3 -m pip install coverage
|
||||
|
||||
echo "docker-compose down"
|
||||
docker-compose down
|
||||
sleep 30
|
||||
|
||||
echo "coverage report generation"
|
||||
bash run_tests.sh
|
||||
|
||||
if [ ! -f reports/coverage.xml ]
|
||||
then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
SERVICE_NAME=$1
|
||||
|
||||
echo "dependency-check:aggregate"
|
||||
mkdir -p reports
|
||||
dependency-check --enableExperimental -f JSON -f XML \
|
||||
--disableAssembly -s . -o reports --project $SERVICE_NAME --exclude ".git/**" --exclude "venv/**" \
|
||||
--exclude "build_venv/**" --exclude "**/__pycache__/**" --exclude "bamboo-specs/**"
|
||||
|
||||
if [[ -z "${bamboo_repository_pr_key}" ]]
|
||||
then
|
||||
echo "Sonar Scan for branch: ${bamboo_planRepository_1_branch}"
|
||||
/usr/bin/sonar-scanner/bin/sonar-scanner -X\
|
||||
-Dsonar.projectKey=RED_$SERVICE_NAME \
|
||||
-Dsonar.host.url=https://sonarqube.iqser.com \
|
||||
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
|
||||
-Dsonar.dependencyCheck.jsonReportPath=reports/dependency-check-report.json \
|
||||
-Dsonar.dependencyCheck.xmlReportPath=reports/dependency-check-report.xml \
|
||||
-Dsonar.dependencyCheck.htmlReportPath=reports/dependency-check-report.html \
|
||||
-Dsonar.python.coverage.reportPaths=reports/coverage.xml
|
||||
|
||||
else
|
||||
echo "Sonar Scan for PR with key1: ${bamboo_repository_pr_key}"
|
||||
/usr/bin/sonar-scanner/bin/sonar-scanner \
|
||||
-Dsonar.projectKey=RED_$SERVICE_NAME \
|
||||
-Dsonar.host.url=https://sonarqube.iqser.com \
|
||||
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
|
||||
-Dsonar.pullrequest.key=${bamboo_repository_pr_key} \
|
||||
-Dsonar.pullrequest.branch=${bamboo_repository_pr_sourceBranch} \
|
||||
-Dsonar.pullrequest.base=${bamboo_repository_pr_targetBranch} \
|
||||
-Dsonar.dependencyCheck.jsonReportPath=reports/dependency-check-report.json \
|
||||
-Dsonar.dependencyCheck.xmlReportPath=reports/dependency-check-report.xml \
|
||||
-Dsonar.dependencyCheck.htmlReportPath=reports/dependency-check-report.html \
|
||||
-Dsonar.python.coverage.reportPaths=reports/coverage.xml
|
||||
fi
|
||||
@ -1,16 +0,0 @@
|
||||
package buildjob;
|
||||
|
||||
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.Plan;
|
||||
import com.atlassian.bamboo.specs.api.exceptions.PropertiesValidationException;
|
||||
import com.atlassian.bamboo.specs.api.util.EntityPropertiesBuilders;
|
||||
import org.junit.Test;
|
||||
|
||||
public class PlanSpecTest {
|
||||
@Test
|
||||
public void checkYourPlanOffline() throws PropertiesValidationException {
|
||||
Plan plan = new PlanSpec().createDockerBuildPlan();
|
||||
|
||||
EntityPropertiesBuilders.build(plan);
|
||||
}
|
||||
}
|
||||
@ -1,6 +0,0 @@
|
||||
___ _ _ ___ __
|
||||
o O O | _ \ | || | |_ _| _ _ / _| _ _ __ _
|
||||
o | _/ \_, | | | | ' \ | _| | '_| / _` |
|
||||
TS__[O] _|_|_ _|__/ |___| |_||_| _|_|_ _|_|_ \__,_|
|
||||
{======|_| ``` |_| ````|_|`````|_|`````|_|`````|_|`````|_|`````|
|
||||
./o--000' `-0-0-' `-0-0-' `-0-0-' `-0-0-' `-0-0-' `-0-0-' `-0-0-'
|
||||
87
config.yaml
87
config.yaml
@ -1,87 +0,0 @@
|
||||
service:
|
||||
logging_level: $LOGGING_LEVEL_ROOT|DEBUG # Logging level for service logger
|
||||
name: $SERVICE_NAME|research # Default service name for research service, used for prometheus metric name
|
||||
response_formatter: default # formats analysis payloads of response messages
|
||||
upload_formatter: projecting # formats analysis payloads of objects uploaded to storage
|
||||
# Note: This is not really the right place for this. It should be configured on a per-service basis.
|
||||
operation: $OPERATION|default
|
||||
# operation needs to be specified in deployment config for services that are called without an operation specified
|
||||
operations:
|
||||
conversion:
|
||||
input:
|
||||
multi: False
|
||||
subdir: ""
|
||||
extension: ORIGIN.pdf.gz
|
||||
output:
|
||||
subdir: "pages_as_images"
|
||||
extension: json.gz
|
||||
extraction:
|
||||
input:
|
||||
multi: False
|
||||
subdir: ""
|
||||
extension: ORIGIN.pdf.gz
|
||||
output:
|
||||
subdir: "extracted_images"
|
||||
extension: json.gz
|
||||
table_parsing:
|
||||
input:
|
||||
multi: True
|
||||
subdir: "pages_as_images"
|
||||
extension: json.gz
|
||||
output:
|
||||
subdir: "table_parses"
|
||||
extension: json.gz
|
||||
image_classification:
|
||||
input:
|
||||
multi: True
|
||||
subdir: "extracted_images"
|
||||
extension: json.gz
|
||||
output:
|
||||
subdir: ""
|
||||
extension: IMAGE_INFO.json.gz
|
||||
default:
|
||||
input:
|
||||
multi: False
|
||||
subdir: ""
|
||||
extension: in.gz
|
||||
output:
|
||||
subdir: ""
|
||||
extension: out.gz
|
||||
|
||||
probing_webserver:
|
||||
host: $PROBING_WEBSERVER_HOST|"0.0.0.0" # Probe webserver address
|
||||
port: $PROBING_WEBSERVER_PORT|8080 # Probe webserver port
|
||||
mode: $PROBING_WEBSERVER_MODE|production # webserver mode: {development, production}
|
||||
|
||||
rabbitmq:
|
||||
host: $RABBITMQ_HOST|localhost # RabbitMQ host address
|
||||
port: $RABBITMQ_PORT|5672 # RabbitMQ host port
|
||||
user: $RABBITMQ_USERNAME|user # RabbitMQ username
|
||||
password: $RABBITMQ_PASSWORD|bitnami # RabbitMQ password
|
||||
heartbeat: $RABBITMQ_HEARTBEAT|7200 # Controls AMQP heartbeat timeout in seconds
|
||||
|
||||
queues:
|
||||
input: $REQUEST_QUEUE|request_queue # Requests to service
|
||||
output: $RESPONSE_QUEUE|response_queue # Responses by service
|
||||
dead_letter: $DEAD_LETTER_QUEUE|dead_letter_queue # Messages that failed to process
|
||||
|
||||
callback:
|
||||
analysis_endpoint: $ANALYSIS_ENDPOINT|"http://127.0.0.1:5000"
|
||||
|
||||
storage:
|
||||
backend: $STORAGE_BACKEND|s3 # The type of storage to use {s3, azure}
|
||||
bucket: "STORAGE_BUCKET_NAME|STORAGE_AZURECONTAINERNAME|pyinfra-test-bucket" # The bucket / container to pull files specified in queue requests from
|
||||
|
||||
s3:
|
||||
endpoint: $STORAGE_ENDPOINT|"http://127.0.0.1:9000"
|
||||
access_key: $STORAGE_KEY|root
|
||||
secret_key: $STORAGE_SECRET|password
|
||||
region: $STORAGE_REGION|"eu-west-1"
|
||||
|
||||
azure:
|
||||
connection_string: $STORAGE_AZURECONNECTIONSTRING|"DefaultEndpointsProtocol=https;AccountName=iqserdevelopment;AccountKey=4imAbV9PYXaztSOMpIyAClg88bAZCXuXMGJG0GA1eIBpdh2PlnFGoRBnKqLy2YZUSTmZ3wJfC7tzfHtuC6FEhQ==;EndpointSuffix=core.windows.net"
|
||||
|
||||
retry:
|
||||
tries: 3
|
||||
delay: 5
|
||||
jitter: [1, 3]
|
||||
@ -1,76 +0,0 @@
|
||||
Processing service interface
|
||||
|
||||
image classification now : JSON (Mdat PDF) -> (Data PDF -> JSON [Mdat ImObj]
|
||||
image classification future: JSON [Mdat FunkIm] | Mdat PDF -> (Data [FunkIm] -> JSON [Mdat FunkIm])
|
||||
object detection : JSON [Mdat PagIm] | Mdat PDF -> (Data [PagIm] -> JSON [[Mdat SemIm]])
|
||||
NER : JSON [Mdat Dict] -> (Data [Dict] -> JSON [Mdat])
|
||||
table parsing : JSON [Mdat FunkIm] | Mdat PDF -> (Data [PagIm] -> JSON [[Mdat FunkIm]])
|
||||
pdf2image : Mdat (fn, [Int], PDF) -> (JSON ([Int], Data PDF) -> [(FunkIm, Mdat)])
|
||||
|
||||
|
||||
image classification now : Mdat (fn, [Int], file) -> (Data PDF -> JSON [Mdat ImObj]
|
||||
image classification future: Mdat (fn, [Int], dir) -> (Data [FunkIm] -> JSON [Mdat FunkIm])
|
||||
object detection : Mdat (fn, [Int], dir) -> (Data [PagIm] -> JSON [[Mdat SemIm]])
|
||||
table parsing : Mdat (fn, [Int], dir) -> (Data [PagIm] -> JSON [[Mdat FunkIm]])
|
||||
NER : Mdat (fn, [Int], file) -> (Data [Dict] -> JSON [Mdat])
|
||||
pdf2image : Mdat (fn, [Int], file) -> (JSON ([Int], Data PDF) -> [(FunkIm, Mdat)])
|
||||
|
||||
|
||||
from funcy import identity
|
||||
|
||||
access(mdat):
|
||||
if mdat.path is file:
|
||||
request = {"data": load(mdat.path), "metadata": mdat}
|
||||
elif mdat.path is dir:
|
||||
get_indexed = identity if not mdat.idx else itemgetter(*mdat.idx)
|
||||
request = {"data": get_indexed(get_files(mdat.path)), "metadata": mdat}
|
||||
else:
|
||||
raise BadRequest
|
||||
|
||||
|
||||
storage:
|
||||
|
||||
fileId: {
|
||||
pages: [PagIm]
|
||||
images: [FunkIm]
|
||||
sections: gz
|
||||
}
|
||||
|
||||
|
||||
---------------
|
||||
|
||||
|
||||
|
||||
assert if targetPath is file then response list must be singleton
|
||||
{index: [], dir: fileID.pdf.gz, targetPath: fileID.images.json.gz} -> [{data: pdf bytes, metadata: request: ...] -> [{data: null, metadata: request: null, response: {classification infos: ...}]
|
||||
image classification now : Mdat (fn, [Int], file) -> [JSON (Data PDF, Mdat)] -> [JSON (Data null, Mdat [ImObj])] | 1 -> 1
|
||||
assert if targetPath is file then response list must be singleton
|
||||
{index: [], dir: fileID/images, targetPath: fileID.images.json.gz} -> [{data: image bytes, metadata: request: {image location...}] -> [{data: null, metadata: request: null, response: {classification infos: ...}]
|
||||
image classification future: Mdat (fn, [Int], dir) -> JSON (Data [FunkIm], Mdat) -> [JSON (Data null, Mdat [FunkIm])] |
|
||||
object detection : Mdat (fn, [Int], dir) -> (Data [PagIm] -> JSON [[Mdat SemIm]])
|
||||
table parsing : Mdat (fn, [Int], dir) -> (Data [PagIm] -> JSON [[Mdat FunkIm]])
|
||||
NER : Mdat (fn, [Int], file) -> (Data [Dict] -> JSON [Mdat])
|
||||
pdf2image : Mdat (fn, [Int], file) -> (JSON ([Int], Data PDF) -> [(FunkIm, Mdat)])
|
||||
|
||||
aggregate <==> targetpath is file and index is empty
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -1,32 +0,0 @@
|
||||
version: '2'
|
||||
services:
|
||||
minio:
|
||||
image: minio/minio:RELEASE.2022-06-11T19-55-32Z
|
||||
ports:
|
||||
- "9000:9000"
|
||||
environment:
|
||||
- MINIO_ROOT_PASSWORD=password
|
||||
- MINIO_ROOT_USER=root
|
||||
volumes:
|
||||
- ./data/minio_store:/data
|
||||
command: server /data
|
||||
network_mode: "bridge"
|
||||
rabbitmq:
|
||||
image: docker.io/bitnami/rabbitmq:3.9.8
|
||||
ports:
|
||||
- '4369:4369'
|
||||
- '5551:5551'
|
||||
- '5552:5552'
|
||||
- '5672:5672'
|
||||
- '25672:25672'
|
||||
- '15672:15672'
|
||||
environment:
|
||||
- RABBITMQ_SECURE_PASSWORD=yes
|
||||
- RABBITMQ_VM_MEMORY_HIGH_WATERMARK=100%
|
||||
- RABBITMQ_DISK_FREE_ABSOLUTE_LIMIT=20Gi
|
||||
network_mode: "bridge"
|
||||
volumes:
|
||||
- /opt/bitnami/rabbitmq/.rabbitmq/:/data/bitnami
|
||||
volumes:
|
||||
mdata:
|
||||
|
||||
6802
poetry.lock
generated
Normal file
6802
poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1 @@
|
||||
|
||||
@ -1,65 +0,0 @@
|
||||
import logging
|
||||
|
||||
from funcy import merge, omit, lmap
|
||||
|
||||
from pyinfra.exceptions import AnalysisFailure
|
||||
from pyinfra.pipeline_factory import CachedPipelineFactory
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Callback:
|
||||
"""This is the callback that is applied to items pulled from the storage. It forwards these items to an analysis
|
||||
endpoint.
|
||||
"""
|
||||
|
||||
def __init__(self, pipeline_factory: CachedPipelineFactory):
|
||||
self.pipeline_factory = pipeline_factory
|
||||
|
||||
def __get_pipeline(self, endpoint):
|
||||
return self.pipeline_factory.get_pipeline(endpoint)
|
||||
|
||||
@staticmethod
|
||||
def __run_pipeline(pipeline, analysis_input: dict):
|
||||
"""
|
||||
TODO: Since data and metadata are passed as singletons, there is no buffering and hence no batching happening
|
||||
within the pipeline. However, the queue acknowledgment logic needs to be changed in order to facilitate
|
||||
passing non-singletons, to only ack a message, once a response is pulled from the output queue of the
|
||||
pipeline. Probably the pipeline return value needs to contains the queue message frame (or so), in order for
|
||||
the queue manager to tell which message to ack.
|
||||
|
||||
TODO: casting list (lmap) on `analysis_response_stream` is a temporary solution, while the client pipeline
|
||||
operates on singletons ([data], [metadata]).
|
||||
"""
|
||||
|
||||
def combine_storage_item_metadata_with_queue_message_metadata(analysis_input):
|
||||
return merge(analysis_input["metadata"], omit(analysis_input, ["data", "metadata"]))
|
||||
|
||||
def remove_queue_message_metadata(analysis_result):
|
||||
metadata = omit(analysis_result["metadata"], queue_message_keys(analysis_input))
|
||||
return {**analysis_result, "metadata": metadata}
|
||||
|
||||
def queue_message_keys(analysis_input):
|
||||
return {*analysis_input.keys()}.difference({"data", "metadata"})
|
||||
|
||||
try:
|
||||
data = analysis_input["data"]
|
||||
metadata = combine_storage_item_metadata_with_queue_message_metadata(analysis_input)
|
||||
analysis_response_stream = pipeline([data], [metadata])
|
||||
analysis_response_stream = lmap(remove_queue_message_metadata, analysis_response_stream)
|
||||
return analysis_response_stream
|
||||
|
||||
except Exception as err:
|
||||
logger.error(err)
|
||||
raise AnalysisFailure from err
|
||||
|
||||
def __call__(self, analysis_input: dict):
|
||||
"""data_metadata_pack: {'dossierId': ..., 'fileId': ..., 'pages': ..., 'operation': ...}"""
|
||||
operation = analysis_input.get("operation", "")
|
||||
pipeline = self.__get_pipeline(operation)
|
||||
|
||||
try:
|
||||
logging.debug(f"Requesting analysis for operation '{operation}'...")
|
||||
return self.__run_pipeline(pipeline, analysis_input)
|
||||
except AnalysisFailure:
|
||||
logging.warning(f"Exception caught when calling analysis endpoint for operation '{operation}'.")
|
||||
@ -1,120 +0,0 @@
|
||||
import logging
|
||||
from functools import lru_cache
|
||||
|
||||
from funcy import project, identity, rcompose
|
||||
|
||||
from pyinfra.callback import Callback
|
||||
from pyinfra.config import parse_disjunction_string
|
||||
from pyinfra.file_descriptor_builder import RedFileDescriptorBuilder
|
||||
from pyinfra.file_descriptor_manager import FileDescriptorManager
|
||||
from pyinfra.pipeline_factory import CachedPipelineFactory
|
||||
from pyinfra.queue.consumer import Consumer
|
||||
from pyinfra.queue.queue_manager.pika_queue_manager import PikaQueueManager
|
||||
from pyinfra.server.client_pipeline import ClientPipeline
|
||||
from pyinfra.server.dispatcher.dispatchers.rest import RestDispatcher
|
||||
from pyinfra.server.interpreter.interpreters.rest_callback import RestPickupStreamer
|
||||
from pyinfra.server.packer.packers.rest import RestPacker
|
||||
from pyinfra.server.receiver.receivers.rest import RestReceiver
|
||||
from pyinfra.storage import storages
|
||||
from pyinfra.visitor import QueueVisitor
|
||||
from pyinfra.visitor.downloader import Downloader
|
||||
from pyinfra.visitor.response_formatter.formatters.default import DefaultResponseFormatter
|
||||
from pyinfra.visitor.response_formatter.formatters.identity import IdentityResponseFormatter
|
||||
from pyinfra.visitor.strategies.response.aggregation import AggregationStorageStrategy, ProjectingUploadFormatter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ComponentFactory:
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_consumer(self, callback=None):
|
||||
callback = callback or self.get_callback()
|
||||
return Consumer(self.get_visitor(callback), self.get_queue_manager())
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_callback(self, analysis_base_url=None):
|
||||
analysis_base_url = analysis_base_url or self.config.rabbitmq.callback.analysis_endpoint
|
||||
|
||||
callback = Callback(CachedPipelineFactory(base_url=analysis_base_url, pipeline_factory=self.get_pipeline))
|
||||
|
||||
def wrapped(body):
|
||||
body_repr = project(body, ["dossierId", "fileId", "operation"])
|
||||
logger.info(f"Processing {body_repr}...")
|
||||
result = callback(body)
|
||||
logger.info(f"Completed processing {body_repr}...")
|
||||
return result
|
||||
|
||||
return wrapped
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_visitor(self, callback):
|
||||
return QueueVisitor(
|
||||
callback=callback,
|
||||
data_loader=self.get_downloader(),
|
||||
response_strategy=self.get_response_strategy(),
|
||||
response_formatter=self.get_response_formatter(),
|
||||
)
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_queue_manager(self):
|
||||
return PikaQueueManager(self.config.rabbitmq.queues.input, self.config.rabbitmq.queues.output)
|
||||
|
||||
@staticmethod
|
||||
@lru_cache(maxsize=None)
|
||||
def get_pipeline(endpoint):
|
||||
return ClientPipeline(
|
||||
RestPacker(), RestDispatcher(endpoint), RestReceiver(), rcompose(RestPickupStreamer(), RestReceiver())
|
||||
)
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_storage(self):
|
||||
return storages.get_storage(self.config.storage.backend)
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_response_strategy(self, storage=None):
|
||||
return AggregationStorageStrategy(
|
||||
storage=storage or self.get_storage(),
|
||||
file_descriptor_manager=self.get_file_descriptor_manager(),
|
||||
upload_formatter=self.get_upload_formatter(),
|
||||
)
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_file_descriptor_manager(self):
|
||||
return FileDescriptorManager(
|
||||
bucket_name=parse_disjunction_string(self.config.storage.bucket),
|
||||
file_descriptor_builder=self.get_operation_file_descriptor_builder(),
|
||||
)
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_upload_formatter(self):
|
||||
return {"identity": identity, "projecting": ProjectingUploadFormatter()}[self.config.service.upload_formatter]
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_operation_file_descriptor_builder(self):
|
||||
return RedFileDescriptorBuilder(
|
||||
operation2file_patterns=self.get_operation2file_patterns(),
|
||||
default_operation_name=self.config.service.operation,
|
||||
)
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_response_formatter(self):
|
||||
return {"default": DefaultResponseFormatter(), "identity": IdentityResponseFormatter()}[
|
||||
self.config.service.response_formatter
|
||||
]
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_operation2file_patterns(self):
|
||||
if self.config.service.operation is not "default":
|
||||
self.config.service.operations["default"] = self.config.service.operations[self.config.service.operation]
|
||||
return self.config.service.operations
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_downloader(self, storage=None):
|
||||
return Downloader(
|
||||
storage=storage or self.get_storage(),
|
||||
bucket_name=parse_disjunction_string(self.config.storage.bucket),
|
||||
file_descriptor_manager=self.get_file_descriptor_manager(),
|
||||
)
|
||||
@ -1,84 +0,0 @@
|
||||
"""Implements a config object with dot-indexing syntax."""
|
||||
import os
|
||||
from functools import partial
|
||||
from itertools import chain
|
||||
from operator import truth
|
||||
from typing import Iterable
|
||||
|
||||
from envyaml import EnvYAML
|
||||
from frozendict import frozendict
|
||||
from funcy import first, juxt, butlast, last, lmap
|
||||
|
||||
from pyinfra.locations import CONFIG_FILE
|
||||
|
||||
|
||||
def _get_item_and_maybe_make_dotindexable(container, item):
|
||||
ret = container[item]
|
||||
return DotIndexable(ret) if isinstance(ret, dict) else ret
|
||||
|
||||
|
||||
class DotIndexable:
|
||||
def __init__(self, x):
|
||||
self.x = x
|
||||
|
||||
def __getattr__(self, item):
|
||||
return _get_item_and_maybe_make_dotindexable(self.x, item)
|
||||
|
||||
def __repr__(self):
|
||||
return self.x.__repr__()
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self.__getattr__(item)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self.x[key] = value
|
||||
|
||||
|
||||
class Config:
|
||||
def __init__(self, config_path):
|
||||
self.__config = EnvYAML(config_path)
|
||||
|
||||
def __getattr__(self, item):
|
||||
if item in self.__config:
|
||||
return _get_item_and_maybe_make_dotindexable(self.__config, item)
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self.__getattr__(item)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self.__config.key = value
|
||||
|
||||
def to_dict(self, frozen=True):
|
||||
return to_dict(self.__config.export(), frozen=frozen)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.to_dict())
|
||||
|
||||
|
||||
def to_dict(v, frozen=True):
|
||||
def make_dict(*args, **kwargs):
|
||||
return (frozendict if frozen else dict)(*args, **kwargs)
|
||||
|
||||
if isinstance(v, list):
|
||||
return tuple(map(partial(to_dict, frozen=frozen), v))
|
||||
elif isinstance(v, DotIndexable):
|
||||
return make_dict({k: to_dict(v, frozen=frozen) for k, v in v.x.items()})
|
||||
elif isinstance(v, dict):
|
||||
return make_dict({k: to_dict(v, frozen=frozen) for k, v in v.items()})
|
||||
else:
|
||||
return v
|
||||
|
||||
|
||||
CONFIG = Config(CONFIG_FILE)
|
||||
|
||||
|
||||
def parse_disjunction_string(disjunction_string):
|
||||
def try_parse_env_var(disjunction_string):
|
||||
try:
|
||||
return os.environ[disjunction_string]
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
options = disjunction_string.split("|")
|
||||
identifiers, fallback_value = juxt(butlast, last)(options)
|
||||
return first(chain(filter(truth, map(try_parse_env_var, identifiers)), [fallback_value]))
|
||||
133
pyinfra/config/loader.py
Normal file
133
pyinfra/config/loader.py
Normal file
@ -0,0 +1,133 @@
|
||||
import argparse
|
||||
import os
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
from dynaconf import Dynaconf, ValidationError, Validator
|
||||
from funcy import lflatten
|
||||
from kn_utils.logging import logger
|
||||
|
||||
# This path is ment for testing purposes and convenience. It probably won't reflect the actual root path when pyinfra is
|
||||
# installed as a package, so don't use it in production code, but define your own root path as described in load config.
|
||||
local_pyinfra_root_path = Path(__file__).parents[2]
|
||||
|
||||
|
||||
def load_settings(
|
||||
settings_path: Union[str, Path, list] = "config/",
|
||||
root_path: Union[str, Path] = None,
|
||||
validators: list[Validator] = None,
|
||||
):
|
||||
"""Load settings from .toml files, .env and environment variables. Also ensures a ROOT_PATH environment variable is
|
||||
set. If ROOT_PATH is not set and no root_path argument is passed, the current working directory is used as root.
|
||||
Settings paths can be a single .toml file, a folder containing .toml files or a list of .toml files and folders.
|
||||
If a ROOT_PATH environment variable is set, it is not overwritten by the root_path argument.
|
||||
If a folder is passed, all .toml files in the folder are loaded. If settings path is None, only .env and
|
||||
environment variables are loaded. If settings_path are relative paths, they are joined with the root_path argument.
|
||||
"""
|
||||
|
||||
root_path = get_or_set_root_path(root_path)
|
||||
validators = validators or get_pyinfra_validators()
|
||||
|
||||
settings_files = normalize_to_settings_files(settings_path, root_path)
|
||||
|
||||
settings = Dynaconf(
|
||||
load_dotenv=True,
|
||||
envvar_prefix=False,
|
||||
settings_files=settings_files,
|
||||
)
|
||||
|
||||
validate_settings(settings, validators)
|
||||
logger.info("Settings loaded and validated.")
|
||||
|
||||
return settings
|
||||
|
||||
|
||||
def normalize_to_settings_files(settings_path: Union[str, Path, list], root_path: Union[str, Path]):
|
||||
if settings_path is None:
|
||||
logger.info("No settings path specified, only loading .env end ENVs.")
|
||||
settings_files = []
|
||||
elif isinstance(settings_path, str) or isinstance(settings_path, Path):
|
||||
settings_files = [settings_path]
|
||||
elif isinstance(settings_path, list):
|
||||
settings_files = settings_path
|
||||
else:
|
||||
raise ValueError(f"Invalid settings path: {settings_path=}")
|
||||
|
||||
settings_files = lflatten(map(partial(_normalize_and_verify, root_path=root_path), settings_files))
|
||||
logger.debug(f"Normalized settings files: {settings_files}")
|
||||
|
||||
return settings_files
|
||||
|
||||
|
||||
def _normalize_and_verify(settings_path: Path, root_path: Path):
|
||||
settings_path = Path(settings_path)
|
||||
root_path = Path(root_path)
|
||||
|
||||
if not settings_path.is_absolute():
|
||||
logger.debug(f"Settings path is not absolute, joining with root path: {root_path}")
|
||||
settings_path = root_path / settings_path
|
||||
|
||||
if settings_path.is_dir():
|
||||
logger.debug(f"Settings path is a directory, loading all .toml files in the directory: {settings_path}")
|
||||
settings_files = list(settings_path.glob("*.toml"))
|
||||
elif settings_path.is_file():
|
||||
logger.debug(f"Settings path is a file, loading specified file: {settings_path}")
|
||||
settings_files = [settings_path]
|
||||
else:
|
||||
raise ValueError(f"Invalid settings path: {settings_path=}, {root_path=}")
|
||||
|
||||
return settings_files
|
||||
|
||||
|
||||
def get_or_set_root_path(root_path: Union[str, Path] = None):
|
||||
env_root_path = os.environ.get("ROOT_PATH")
|
||||
|
||||
if env_root_path:
|
||||
root_path = env_root_path
|
||||
logger.debug(f"'ROOT_PATH' environment variable is set to {root_path}.")
|
||||
|
||||
elif root_path:
|
||||
logger.info(f"'ROOT_PATH' environment variable is not set, setting to {root_path}.")
|
||||
os.environ["ROOT_PATH"] = str(root_path)
|
||||
|
||||
else:
|
||||
root_path = Path.cwd()
|
||||
logger.info(f"'ROOT_PATH' environment variable is not set, defaulting to working directory {root_path}.")
|
||||
os.environ["ROOT_PATH"] = str(root_path)
|
||||
|
||||
return root_path
|
||||
|
||||
|
||||
def get_pyinfra_validators():
|
||||
import pyinfra.config.validators
|
||||
|
||||
return lflatten(
|
||||
validator for validator in pyinfra.config.validators.__dict__.values() if isinstance(validator, list)
|
||||
)
|
||||
|
||||
|
||||
def validate_settings(settings: Dynaconf, validators):
|
||||
settings_valid = True
|
||||
|
||||
for validator in validators:
|
||||
try:
|
||||
validator.validate(settings)
|
||||
except ValidationError as e:
|
||||
settings_valid = False
|
||||
logger.warning(e)
|
||||
|
||||
if not settings_valid:
|
||||
raise ValidationError("Settings validation failed.")
|
||||
|
||||
logger.debug("Settings validated.")
|
||||
|
||||
|
||||
def parse_settings_path():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"settings_path",
|
||||
help="Path to settings file(s) or folder(s). Must be .toml file(s) or a folder(s) containing .toml files.",
|
||||
nargs="+",
|
||||
)
|
||||
return parser.parse_args().settings_path
|
||||
57
pyinfra/config/validators.py
Normal file
57
pyinfra/config/validators.py
Normal file
@ -0,0 +1,57 @@
|
||||
from dynaconf import Validator
|
||||
|
||||
queue_manager_validators = [
|
||||
Validator("rabbitmq.host", must_exist=True, is_type_of=str),
|
||||
Validator("rabbitmq.port", must_exist=True, is_type_of=int),
|
||||
Validator("rabbitmq.username", must_exist=True, is_type_of=str),
|
||||
Validator("rabbitmq.password", must_exist=True, is_type_of=str),
|
||||
Validator("rabbitmq.heartbeat", must_exist=True, is_type_of=int),
|
||||
Validator("rabbitmq.connection_sleep", must_exist=True, is_type_of=int),
|
||||
Validator("rabbitmq.input_queue", must_exist=True, is_type_of=str),
|
||||
Validator("rabbitmq.output_queue", must_exist=True, is_type_of=str),
|
||||
Validator("rabbitmq.dead_letter_queue", must_exist=True, is_type_of=str),
|
||||
]
|
||||
|
||||
azure_storage_validators = [
|
||||
Validator("storage.azure.connection_string", must_exist=True, is_type_of=str),
|
||||
Validator("storage.azure.container", must_exist=True, is_type_of=str),
|
||||
]
|
||||
|
||||
s3_storage_validators = [
|
||||
Validator("storage.s3.endpoint", must_exist=True, is_type_of=str),
|
||||
Validator("storage.s3.key", must_exist=True, is_type_of=str),
|
||||
Validator("storage.s3.secret", must_exist=True, is_type_of=str),
|
||||
Validator("storage.s3.region", must_exist=True, is_type_of=str),
|
||||
Validator("storage.s3.bucket", must_exist=True, is_type_of=str),
|
||||
]
|
||||
|
||||
storage_validators = [
|
||||
Validator("storage.backend", must_exist=True, is_type_of=str),
|
||||
]
|
||||
|
||||
multi_tenant_storage_validators = [
|
||||
Validator("storage.tenant_server.endpoint", must_exist=True, is_type_of=str),
|
||||
Validator("storage.tenant_server.public_key", must_exist=True, is_type_of=str),
|
||||
]
|
||||
|
||||
|
||||
prometheus_validators = [
|
||||
Validator("metrics.prometheus.prefix", must_exist=True, is_type_of=str),
|
||||
Validator("metrics.prometheus.enabled", must_exist=True, is_type_of=bool),
|
||||
]
|
||||
|
||||
webserver_validators = [
|
||||
Validator("webserver.host", must_exist=True, is_type_of=str),
|
||||
Validator("webserver.port", must_exist=True, is_type_of=int),
|
||||
]
|
||||
|
||||
tracing_validators = [
|
||||
Validator("tracing.enabled", must_exist=True, is_type_of=bool),
|
||||
Validator("tracing.type", must_exist=True, is_type_of=str)
|
||||
]
|
||||
|
||||
opentelemetry_validators = [
|
||||
Validator("tracing.opentelemetry.endpoint", must_exist=True, is_type_of=str),
|
||||
Validator("tracing.opentelemetry.service_name", must_exist=True, is_type_of=str),
|
||||
Validator("tracing.opentelemetry.exporter", must_exist=True, is_type_of=str)
|
||||
]
|
||||
@ -1,8 +0,0 @@
|
||||
from functools import lru_cache
|
||||
|
||||
from pyinfra.component_factory import ComponentFactory
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_component_factory(config):
|
||||
return ComponentFactory(config)
|
||||
169
pyinfra/examples.py
Normal file
169
pyinfra/examples.py
Normal file
@ -0,0 +1,169 @@
|
||||
import asyncio
|
||||
import signal
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from aiormq.exceptions import AMQPConnectionError
|
||||
from dynaconf import Dynaconf
|
||||
from fastapi import FastAPI
|
||||
from kn_utils.logging import logger
|
||||
|
||||
from pyinfra.config.loader import get_pyinfra_validators, validate_settings
|
||||
from pyinfra.queue.async_manager import AsyncQueueManager, RabbitMQConfig
|
||||
from pyinfra.queue.callback import Callback
|
||||
from pyinfra.queue.manager import QueueManager
|
||||
from pyinfra.utils.opentelemetry import instrument_app, instrument_pika, setup_trace
|
||||
from pyinfra.webserver.prometheus import (
|
||||
add_prometheus_endpoint,
|
||||
make_prometheus_processing_time_decorator_from_settings,
|
||||
)
|
||||
from pyinfra.webserver.utils import (
|
||||
add_health_check_endpoint,
|
||||
create_webserver_thread_from_settings,
|
||||
run_async_webserver,
|
||||
)
|
||||
|
||||
shutdown_flag = False
|
||||
|
||||
|
||||
async def graceful_shutdown(manager: AsyncQueueManager, queue_task, webserver_task):
|
||||
global shutdown_flag
|
||||
shutdown_flag = True
|
||||
logger.info("SIGTERM received, shutting down gracefully...")
|
||||
|
||||
if queue_task and not queue_task.done():
|
||||
queue_task.cancel()
|
||||
|
||||
# await queue manager shutdown
|
||||
await asyncio.gather(queue_task, manager.shutdown(), return_exceptions=True)
|
||||
|
||||
if webserver_task and not webserver_task.done():
|
||||
webserver_task.cancel()
|
||||
|
||||
# await webserver shutdown
|
||||
await asyncio.gather(webserver_task, return_exceptions=True)
|
||||
|
||||
logger.info("Shutdown complete.")
|
||||
|
||||
|
||||
async def run_async_queues(manager: AsyncQueueManager, app, port, host):
|
||||
"""Run the async webserver and the async queue manager concurrently."""
|
||||
queue_task = None
|
||||
webserver_task = None
|
||||
tenant_api_available = True
|
||||
|
||||
# add signal handler for SIGTERM and SIGINT
|
||||
loop = asyncio.get_running_loop()
|
||||
loop.add_signal_handler(
|
||||
signal.SIGTERM, lambda: asyncio.create_task(graceful_shutdown(manager, queue_task, webserver_task))
|
||||
)
|
||||
loop.add_signal_handler(
|
||||
signal.SIGINT, lambda: asyncio.create_task(graceful_shutdown(manager, queue_task, webserver_task))
|
||||
)
|
||||
|
||||
try:
|
||||
active_tenants = await manager.fetch_active_tenants()
|
||||
|
||||
queue_task = asyncio.create_task(manager.run(active_tenants=active_tenants), name="queues")
|
||||
webserver_task = asyncio.create_task(run_async_webserver(app, port, host), name="webserver")
|
||||
await asyncio.gather(queue_task, webserver_task)
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Main task was cancelled, initiating shutdown.")
|
||||
except AMQPConnectionError as e:
|
||||
logger.warning(f"AMQPConnectionError: {e} - shutting down.")
|
||||
except (aiohttp.ClientResponseError, aiohttp.ClientConnectorError):
|
||||
logger.warning("Tenant server did not answer - shutting down.")
|
||||
tenant_api_available = False
|
||||
except Exception as e:
|
||||
logger.error(f"An error occurred while running async queues: {e}", exc_info=True)
|
||||
sys.exit(1)
|
||||
finally:
|
||||
if shutdown_flag:
|
||||
logger.debug("Graceful shutdown already in progress.")
|
||||
else:
|
||||
logger.warning("Initiating shutdown due to error or manual interruption.")
|
||||
if not tenant_api_available:
|
||||
sys.exit(0)
|
||||
if queue_task and not queue_task.done():
|
||||
queue_task.cancel()
|
||||
|
||||
if webserver_task and not webserver_task.done():
|
||||
webserver_task.cancel()
|
||||
|
||||
await asyncio.gather(queue_task, manager.shutdown(), webserver_task, return_exceptions=True)
|
||||
logger.info("Shutdown complete.")
|
||||
|
||||
|
||||
def start_standard_queue_consumer(
|
||||
callback: Callback,
|
||||
settings: Dynaconf,
|
||||
app: FastAPI = None,
|
||||
):
|
||||
"""Default serving logic for research services.
|
||||
|
||||
Supplies /health, /ready and /prometheus endpoints (if enabled). The callback is monitored for processing time per
|
||||
message. Also traces the queue messages via openTelemetry (if enabled).
|
||||
Workload is received via queue messages and processed by the callback function (see pyinfra.queue.callback for
|
||||
callbacks).
|
||||
"""
|
||||
validate_settings(settings, get_pyinfra_validators())
|
||||
|
||||
logger.info("Starting webserver and queue consumer...")
|
||||
|
||||
app = app or FastAPI()
|
||||
|
||||
if settings.metrics.prometheus.enabled:
|
||||
logger.info("Prometheus metrics enabled.")
|
||||
app = add_prometheus_endpoint(app)
|
||||
callback = make_prometheus_processing_time_decorator_from_settings(settings)(callback)
|
||||
|
||||
if settings.tracing.enabled:
|
||||
setup_trace(settings)
|
||||
|
||||
instrument_pika(dynamic_queues=settings.dynamic_tenant_queues.enabled)
|
||||
instrument_app(app)
|
||||
|
||||
if settings.dynamic_tenant_queues.enabled:
|
||||
logger.info("Dynamic tenant queues enabled. Running async queues.")
|
||||
config = RabbitMQConfig(
|
||||
host=settings.rabbitmq.host,
|
||||
port=settings.rabbitmq.port,
|
||||
username=settings.rabbitmq.username,
|
||||
password=settings.rabbitmq.password,
|
||||
heartbeat=settings.rabbitmq.heartbeat,
|
||||
input_queue_prefix=settings.rabbitmq.service_request_queue_prefix,
|
||||
tenant_event_queue_suffix=settings.rabbitmq.tenant_event_queue_suffix,
|
||||
tenant_exchange_name=settings.rabbitmq.tenant_exchange_name,
|
||||
service_request_exchange_name=settings.rabbitmq.service_request_exchange_name,
|
||||
service_response_exchange_name=settings.rabbitmq.service_response_exchange_name,
|
||||
service_dead_letter_queue_name=settings.rabbitmq.service_dlq_name,
|
||||
queue_expiration_time=settings.rabbitmq.queue_expiration_time,
|
||||
pod_name=settings.kubernetes.pod_name,
|
||||
)
|
||||
manager = AsyncQueueManager(
|
||||
config=config,
|
||||
tenant_service_url=settings.storage.tenant_server.endpoint,
|
||||
message_processor=callback,
|
||||
max_concurrent_tasks=(
|
||||
settings.asyncio.max_concurrent_tasks if hasattr(settings.asyncio, "max_concurrent_tasks") else 10
|
||||
),
|
||||
)
|
||||
else:
|
||||
logger.info("Dynamic tenant queues disabled. Running sync queues.")
|
||||
manager = QueueManager(settings)
|
||||
|
||||
app = add_health_check_endpoint(app, manager.is_ready)
|
||||
|
||||
if isinstance(manager, AsyncQueueManager):
|
||||
asyncio.run(run_async_queues(manager, app, port=settings.webserver.port, host=settings.webserver.host))
|
||||
|
||||
elif isinstance(manager, QueueManager):
|
||||
webserver = create_webserver_thread_from_settings(app, settings)
|
||||
webserver.start()
|
||||
try:
|
||||
manager.start_consuming(callback)
|
||||
except Exception as e:
|
||||
logger.error(f"An error occurred while consuming messages: {e}", exc_info=True)
|
||||
sys.exit(1)
|
||||
else:
|
||||
logger.warning(f"Behavior for type {type(manager)} is not defined")
|
||||
@ -1,50 +0,0 @@
|
||||
class AnalysisFailure(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class DataLoadingFailure(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ProcessingFailure(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class UnknownStorageBackend(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidEndpoint(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class UnknownClient(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class ConsumerError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class NoSuchContainer(KeyError):
|
||||
pass
|
||||
|
||||
|
||||
class IntentionalTestException(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
class UnexpectedItemType(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class NoBufferCapacity(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidMessage(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidStorageItemFormat(ValueError):
|
||||
pass
|
||||
@ -1,99 +0,0 @@
|
||||
import abc
|
||||
import os
|
||||
from operator import itemgetter
|
||||
|
||||
from funcy import project
|
||||
|
||||
|
||||
class FileDescriptorBuilder:
|
||||
@abc.abstractmethod
|
||||
def build_file_descriptor(self, queue_item_body, end="input"):
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def build_matcher(self, file_descriptor):
|
||||
raise NotImplementedError
|
||||
|
||||
@staticmethod
|
||||
@abc.abstractmethod
|
||||
def build_storage_upload_info(analysis_payload, request_metadata):
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_path_prefix(self, queue_item_body):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class RedFileDescriptorBuilder(FileDescriptorBuilder):
|
||||
"""Defines concrete descriptors for storage objects based on queue messages"""
|
||||
|
||||
def __init__(self, operation2file_patterns, default_operation_name):
|
||||
|
||||
self.operation2file_patterns = operation2file_patterns or self.get_default_operation2file_patterns()
|
||||
self.default_operation_name = default_operation_name
|
||||
|
||||
@staticmethod
|
||||
def get_default_operation2file_patterns():
|
||||
return {"default": {"input": {"subdir": "", "extension": ".in"}, "output": {"subdir": "", "extension": ".out"}}}
|
||||
|
||||
def build_file_descriptor(self, queue_item_body, end="input"):
|
||||
|
||||
def pages():
|
||||
if end == "input":
|
||||
if "id" in queue_item_body:
|
||||
return [queue_item_body["id"]]
|
||||
else:
|
||||
return queue_item_body["pages"] if file_pattern["multi"] else []
|
||||
elif end == "output":
|
||||
return [queue_item_body["id"]]
|
||||
else:
|
||||
raise ValueError(f"Invalid argument: {end=}") # TODO: use an enum for `end`
|
||||
|
||||
operation = queue_item_body.get("operation", self.default_operation_name)
|
||||
|
||||
file_pattern = self.operation2file_patterns[operation][end]
|
||||
|
||||
file_descriptor = {
|
||||
**project(queue_item_body, ["dossierId", "fileId", "pages"]),
|
||||
"pages": pages(),
|
||||
"extension": file_pattern["extension"],
|
||||
"subdir": file_pattern["subdir"],
|
||||
}
|
||||
|
||||
return file_descriptor
|
||||
|
||||
def build_matcher(self, file_descriptor):
|
||||
def make_filename(file_id, subdir, suffix):
|
||||
return os.path.join(file_id, subdir, suffix) if subdir else f"{file_id}.{suffix}"
|
||||
|
||||
dossier_id, file_id, subdir, pages, extension = itemgetter(
|
||||
"dossierId", "fileId", "subdir", "pages", "extension"
|
||||
)(file_descriptor)
|
||||
|
||||
matcher = os.path.join(
|
||||
dossier_id, make_filename(file_id, subdir, self.__build_page_regex(pages, subdir) + extension)
|
||||
)
|
||||
|
||||
return matcher
|
||||
|
||||
@staticmethod
|
||||
def __build_page_regex(pages, subdir):
|
||||
|
||||
n_pages = len(pages)
|
||||
if n_pages > 1:
|
||||
page_re = "id:(" + "|".join(map(str, pages)) + ")."
|
||||
elif n_pages == 1:
|
||||
page_re = f"id:{pages[0]}."
|
||||
else: # no pages specified -> either all pages or no pages, depending on whether a subdir is specified
|
||||
page_re = r"id:\d+." if subdir else ""
|
||||
|
||||
return page_re
|
||||
|
||||
@staticmethod
|
||||
def build_storage_upload_info(analysis_payload, request_metadata):
|
||||
storage_upload_info = {**request_metadata, "id": analysis_payload["metadata"].get("id", 0)}
|
||||
return storage_upload_info
|
||||
|
||||
def get_path_prefix(self, queue_item_body):
|
||||
prefix = "/".join(itemgetter("dossierId", "fileId")(self.build_file_descriptor(queue_item_body, end="input")))
|
||||
return prefix
|
||||
@ -1,63 +0,0 @@
|
||||
from pyinfra.file_descriptor_builder import FileDescriptorBuilder
|
||||
|
||||
|
||||
class FileDescriptorManager:
|
||||
"""Decorates a file descriptor builder with additional convenience functionality and this way provides a
|
||||
comprehensive interface for all file descriptor related operations, while the concrete descriptor logic is
|
||||
implemented in a file descriptor builder.
|
||||
|
||||
TODO: This is supposed to be fully decoupled from the concrete file descriptor builder implementation, however some
|
||||
bad coupling is still left.
|
||||
"""
|
||||
|
||||
def __init__(self, bucket_name, file_descriptor_builder: FileDescriptorBuilder):
|
||||
self.bucket_name = bucket_name
|
||||
self.operation_file_descriptor_builder = file_descriptor_builder
|
||||
|
||||
def get_input_object_name(self, queue_item_body: dict):
|
||||
return self.get_object_name(queue_item_body, end="input")
|
||||
|
||||
def get_output_object_name(self, queue_item_body: dict):
|
||||
return self.get_object_name(queue_item_body, end="output")
|
||||
|
||||
def get_object_name(self, queue_item_body: dict, end):
|
||||
file_descriptor = self.build_file_descriptor(queue_item_body, end=end)
|
||||
object_name = self.__build_matcher(file_descriptor)
|
||||
|
||||
return object_name
|
||||
|
||||
def build_file_descriptor(self, queue_item_body, end="input"):
|
||||
return self.operation_file_descriptor_builder.build_file_descriptor(queue_item_body, end=end)
|
||||
|
||||
def build_input_matcher(self, queue_item_body):
|
||||
return self.build_matcher(queue_item_body, end="input")
|
||||
|
||||
def build_output_matcher(self, queue_item_body):
|
||||
return self.build_matcher(queue_item_body, end="output")
|
||||
|
||||
def build_matcher(self, queue_item_body, end):
|
||||
file_descriptor = self.build_file_descriptor(queue_item_body, end=end)
|
||||
return self.__build_matcher(file_descriptor)
|
||||
|
||||
def __build_matcher(self, file_descriptor):
|
||||
return self.operation_file_descriptor_builder.build_matcher(file_descriptor)
|
||||
|
||||
def get_input_object_descriptor(self, queue_item_body):
|
||||
return self.get_object_descriptor(queue_item_body, end="input")
|
||||
|
||||
def get_output_object_descriptor(self, storage_upload_info):
|
||||
return self.get_object_descriptor(storage_upload_info, end="output")
|
||||
|
||||
def get_object_descriptor(self, queue_item_body, end):
|
||||
# TODO: this is complected with the Storage class API
|
||||
# FIXME: bad coupling
|
||||
return {
|
||||
"bucket_name": self.bucket_name,
|
||||
"object_name": self.get_object_name(queue_item_body, end=end),
|
||||
}
|
||||
|
||||
def build_storage_upload_info(self, analysis_payload, request_metadata):
|
||||
return self.operation_file_descriptor_builder.build_storage_upload_info(analysis_payload, request_metadata)
|
||||
|
||||
def get_path_prefix(self, queue_item_body):
|
||||
return self.operation_file_descriptor_builder.get_path_prefix(queue_item_body)
|
||||
@ -1,63 +0,0 @@
|
||||
import logging
|
||||
|
||||
import requests
|
||||
from flask import Flask, jsonify
|
||||
from waitress import serve
|
||||
|
||||
from pyinfra.config import CONFIG
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
def run_probing_webserver(app, host=None, port=None, mode=None):
|
||||
if not host:
|
||||
host = CONFIG.probing_webserver.host
|
||||
|
||||
if not port:
|
||||
port = CONFIG.probing_webserver.port
|
||||
|
||||
if not mode:
|
||||
mode = CONFIG.probing_webserver.mode
|
||||
|
||||
if mode == "development":
|
||||
app.run(host=host, port=port, debug=True)
|
||||
|
||||
elif mode == "production":
|
||||
serve(app, host=host, port=port)
|
||||
|
||||
|
||||
def set_up_probing_webserver():
|
||||
# TODO: implement meaningful checks
|
||||
app = Flask(__name__)
|
||||
informed_about_missing_prometheus_endpoint = False
|
||||
|
||||
@app.route("/ready", methods=["GET"])
|
||||
def ready():
|
||||
resp = jsonify("OK")
|
||||
resp.status_code = 200
|
||||
return resp
|
||||
|
||||
@app.route("/health", methods=["GET"])
|
||||
def healthy():
|
||||
resp = jsonify("OK")
|
||||
resp.status_code = 200
|
||||
return resp
|
||||
|
||||
@app.route("/prometheus", methods=["GET"])
|
||||
def get_metrics_from_analysis_endpoint():
|
||||
nonlocal informed_about_missing_prometheus_endpoint
|
||||
try:
|
||||
resp = requests.get(f"{CONFIG.rabbitmq.callback.analysis_endpoint}/prometheus")
|
||||
resp.raise_for_status()
|
||||
except ConnectionError:
|
||||
return ""
|
||||
except requests.exceptions.HTTPError as err:
|
||||
if resp.status_code == 404:
|
||||
if not informed_about_missing_prometheus_endpoint:
|
||||
logger.warning(f"Got no metrics from analysis prometheus endpoint: {err}")
|
||||
informed_about_missing_prometheus_endpoint = True
|
||||
else:
|
||||
logging.warning(f"Caught {err}")
|
||||
return resp.text
|
||||
|
||||
return app
|
||||
@ -1,18 +0,0 @@
|
||||
"""Defines constant paths relative to the module root path."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
MODULE_DIR = Path(__file__).resolve().parents[0]
|
||||
|
||||
PACKAGE_ROOT_DIR = MODULE_DIR.parents[0]
|
||||
|
||||
TEST_DIR = PACKAGE_ROOT_DIR / "test"
|
||||
|
||||
CONFIG_FILE = PACKAGE_ROOT_DIR / "config.yaml"
|
||||
|
||||
TEST_CONFIG_FILE = TEST_DIR / "config.yaml"
|
||||
|
||||
COMPOSE_PATH = PACKAGE_ROOT_DIR
|
||||
|
||||
BANNER_FILE = PACKAGE_ROOT_DIR / "banner.txt"
|
||||
@ -1,14 +0,0 @@
|
||||
import abc
|
||||
|
||||
|
||||
class ParsingError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class BlobParser(abc.ABC):
|
||||
@abc.abstractmethod
|
||||
def parse(self, blob: bytes):
|
||||
pass
|
||||
|
||||
def __call__(self, blob: bytes):
|
||||
return self.parse(blob)
|
||||
@ -1,67 +0,0 @@
|
||||
import logging
|
||||
|
||||
from funcy import rcompose
|
||||
|
||||
from pyinfra.parser.blob_parser import ParsingError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Either:
|
||||
def __init__(self, item):
|
||||
self.item = item
|
||||
|
||||
def bind(self):
|
||||
return self.item
|
||||
|
||||
|
||||
class Left(Either):
|
||||
pass
|
||||
|
||||
|
||||
class Right(Either):
|
||||
pass
|
||||
|
||||
|
||||
class EitherParserWrapper:
|
||||
def __init__(self, parser):
|
||||
self.parser = parser
|
||||
|
||||
def __log(self, result):
|
||||
if isinstance(result, Right):
|
||||
logger.log(logging.DEBUG - 5, f"{self.parser.__class__.__name__} succeeded or forwarded on {result.bind()}")
|
||||
else:
|
||||
logger.log(logging.DEBUG - 5, f"{self.parser.__class__.__name__} failed on {result.bind()}")
|
||||
return result
|
||||
|
||||
def parse(self, item: Either):
|
||||
if isinstance(item, Left):
|
||||
|
||||
try:
|
||||
return Right(self.parser(item.bind()))
|
||||
except ParsingError:
|
||||
return item
|
||||
|
||||
elif isinstance(item, Right):
|
||||
return item
|
||||
|
||||
else:
|
||||
return self.parse(Left(item))
|
||||
|
||||
def __call__(self, item):
|
||||
return self.__log(self.parse(item))
|
||||
|
||||
|
||||
class EitherParserComposer:
|
||||
def __init__(self, *parsers):
|
||||
self.parser = rcompose(*map(EitherParserWrapper, parsers))
|
||||
|
||||
def parse(self, item):
|
||||
result = self.parser(item)
|
||||
if isinstance(result, Right):
|
||||
return result.bind()
|
||||
else:
|
||||
raise ParsingError("All parsers failed.")
|
||||
|
||||
def __call__(self, item):
|
||||
return self.parse(item)
|
||||
@ -1,7 +0,0 @@
|
||||
from pyinfra.parser.blob_parser import BlobParser
|
||||
|
||||
|
||||
class IdentityBlobParser(BlobParser):
|
||||
|
||||
def parse(self, data: bytes):
|
||||
return data
|
||||
@ -1,21 +0,0 @@
|
||||
import json
|
||||
|
||||
from pyinfra.parser.blob_parser import BlobParser, ParsingError
|
||||
from pyinfra.server.packing import string_to_bytes
|
||||
|
||||
|
||||
class JsonBlobParser(BlobParser):
|
||||
|
||||
def parse(self, data: bytes):
|
||||
try:
|
||||
data = data.decode()
|
||||
data = json.loads(data)
|
||||
except (UnicodeDecodeError, json.JSONDecodeError, AttributeError) as err:
|
||||
raise ParsingError from err
|
||||
|
||||
try:
|
||||
data["data"] = string_to_bytes(data["data"])
|
||||
except (KeyError, TypeError) as err:
|
||||
raise ParsingError from err
|
||||
|
||||
return data
|
||||
@ -1,9 +0,0 @@
|
||||
from pyinfra.parser.blob_parser import BlobParser, ParsingError
|
||||
|
||||
|
||||
class StringBlobParser(BlobParser):
|
||||
def parse(self, data: bytes):
|
||||
try:
|
||||
return data.decode()
|
||||
except Exception as err:
|
||||
raise ParsingError from err
|
||||
@ -1,18 +0,0 @@
|
||||
class CachedPipelineFactory:
|
||||
def __init__(self, base_url, pipeline_factory):
|
||||
self.base_url = base_url
|
||||
self.operation2pipeline = {}
|
||||
self.pipeline_factory = pipeline_factory
|
||||
|
||||
def get_pipeline(self, operation: str):
|
||||
pipeline = self.operation2pipeline.get(operation, None) or self.__register_pipeline(operation)
|
||||
return pipeline
|
||||
|
||||
def __register_pipeline(self, operation):
|
||||
endpoint = self.__make_endpoint(operation)
|
||||
pipeline = self.pipeline_factory(endpoint)
|
||||
self.operation2pipeline[operation] = pipeline
|
||||
return pipeline
|
||||
|
||||
def __make_endpoint(self, operation):
|
||||
return f"{self.base_url}/{operation}"
|
||||
329
pyinfra/queue/async_manager.py
Normal file
329
pyinfra/queue/async_manager.py
Normal file
@ -0,0 +1,329 @@
|
||||
import asyncio
|
||||
import concurrent.futures
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Dict, Set
|
||||
|
||||
import aiohttp
|
||||
from aio_pika import ExchangeType, IncomingMessage, Message, connect
|
||||
from aio_pika.abc import (
|
||||
AbstractChannel,
|
||||
AbstractConnection,
|
||||
AbstractExchange,
|
||||
AbstractIncomingMessage,
|
||||
AbstractQueue,
|
||||
)
|
||||
from aio_pika.exceptions import (
|
||||
ChannelClosed,
|
||||
ChannelInvalidStateError,
|
||||
ConnectionClosed,
|
||||
)
|
||||
from aiormq.exceptions import AMQPConnectionError
|
||||
from kn_utils.logging import logger
|
||||
from kn_utils.retry import retry
|
||||
|
||||
|
||||
@dataclass
|
||||
class RabbitMQConfig:
|
||||
host: str
|
||||
port: int
|
||||
username: str
|
||||
password: str
|
||||
heartbeat: int
|
||||
input_queue_prefix: str
|
||||
tenant_event_queue_suffix: str
|
||||
tenant_exchange_name: str
|
||||
service_request_exchange_name: str
|
||||
service_response_exchange_name: str
|
||||
service_dead_letter_queue_name: str
|
||||
queue_expiration_time: int
|
||||
pod_name: str
|
||||
|
||||
connection_params: Dict[str, object] = field(init=False)
|
||||
|
||||
def __post_init__(self):
|
||||
self.connection_params = {
|
||||
"host": self.host,
|
||||
"port": self.port,
|
||||
"login": self.username,
|
||||
"password": self.password,
|
||||
"client_properties": {"heartbeat": self.heartbeat},
|
||||
}
|
||||
|
||||
|
||||
class AsyncQueueManager:
|
||||
def __init__(
|
||||
self,
|
||||
config: RabbitMQConfig,
|
||||
tenant_service_url: str,
|
||||
message_processor: Callable[[Dict[str, Any]], Dict[str, Any]],
|
||||
max_concurrent_tasks: int = 10,
|
||||
):
|
||||
self.config = config
|
||||
self.tenant_service_url = tenant_service_url
|
||||
self.message_processor = message_processor
|
||||
self.semaphore = asyncio.Semaphore(max_concurrent_tasks)
|
||||
|
||||
self.connection: AbstractConnection | None = None
|
||||
self.channel: AbstractChannel | None = None
|
||||
self.tenant_exchange: AbstractExchange | None = None
|
||||
self.input_exchange: AbstractExchange | None = None
|
||||
self.output_exchange: AbstractExchange | None = None
|
||||
self.tenant_exchange_queue: AbstractQueue | None = None
|
||||
self.tenant_queues: Dict[str, AbstractChannel] = {}
|
||||
self.consumer_tags: Dict[str, str] = {}
|
||||
|
||||
self.message_count: int = 0
|
||||
|
||||
@retry(tries=5, exceptions=AMQPConnectionError, reraise=True, logger=logger)
|
||||
async def connect(self) -> None:
|
||||
logger.info("Attempting to connect to RabbitMQ...")
|
||||
self.connection = await connect(**self.config.connection_params)
|
||||
self.connection.close_callbacks.add(self.on_connection_close)
|
||||
self.channel = await self.connection.channel()
|
||||
await self.channel.set_qos(prefetch_count=1)
|
||||
logger.info("Successfully connected to RabbitMQ")
|
||||
|
||||
async def on_connection_close(self, sender, exc):
|
||||
"""This is a callback for unexpected connection closures."""
|
||||
logger.debug(f"Sender: {sender}")
|
||||
if isinstance(exc, ConnectionClosed):
|
||||
logger.warning("Connection to RabbitMQ lost. Attempting to reconnect...")
|
||||
try:
|
||||
active_tenants = await self.fetch_active_tenants()
|
||||
await self.run(active_tenants=active_tenants)
|
||||
logger.debug("Reconnected to RabbitMQ successfully")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to reconnect to RabbitMQ: {e}")
|
||||
# cancel queue manager and webserver to shutdown service
|
||||
tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
|
||||
[task.cancel() for task in tasks if task.get_name() in ["queues", "webserver"]]
|
||||
else:
|
||||
logger.debug("Connection closed on purpose.")
|
||||
|
||||
async def is_ready(self) -> bool:
|
||||
if self.connection is None or self.connection.is_closed:
|
||||
try:
|
||||
await self.connect()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to connect to RabbitMQ: {e}")
|
||||
return False
|
||||
return True
|
||||
|
||||
@retry(tries=5, exceptions=(AMQPConnectionError, ChannelInvalidStateError), reraise=True, logger=logger)
|
||||
async def setup_exchanges(self) -> None:
|
||||
self.tenant_exchange = await self.channel.declare_exchange(
|
||||
self.config.tenant_exchange_name, ExchangeType.TOPIC, durable=True
|
||||
)
|
||||
self.input_exchange = await self.channel.declare_exchange(
|
||||
self.config.service_request_exchange_name, ExchangeType.DIRECT, durable=True
|
||||
)
|
||||
self.output_exchange = await self.channel.declare_exchange(
|
||||
self.config.service_response_exchange_name, ExchangeType.DIRECT, durable=True
|
||||
)
|
||||
|
||||
# we must declare DLQ to handle error messages
|
||||
self.dead_letter_queue = await self.channel.declare_queue(
|
||||
self.config.service_dead_letter_queue_name, durable=True
|
||||
)
|
||||
|
||||
@retry(tries=5, exceptions=(AMQPConnectionError, ChannelInvalidStateError), reraise=True, logger=logger)
|
||||
async def setup_tenant_queue(self) -> None:
|
||||
self.tenant_exchange_queue = await self.channel.declare_queue(
|
||||
f"{self.config.pod_name}_{self.config.tenant_event_queue_suffix}",
|
||||
durable=True,
|
||||
arguments={
|
||||
"x-dead-letter-exchange": "",
|
||||
"x-dead-letter-routing-key": self.config.service_dead_letter_queue_name,
|
||||
"x-expires": self.config.queue_expiration_time,
|
||||
},
|
||||
)
|
||||
await self.tenant_exchange_queue.bind(self.tenant_exchange, routing_key="tenant.*")
|
||||
self.consumer_tags["tenant_exchange_queue"] = await self.tenant_exchange_queue.consume(
|
||||
self.process_tenant_message
|
||||
)
|
||||
|
||||
async def process_tenant_message(self, message: AbstractIncomingMessage) -> None:
|
||||
try:
|
||||
async with message.process():
|
||||
message_body = json.loads(message.body.decode())
|
||||
logger.debug(f"Tenant message received: {message_body}")
|
||||
tenant_id = message_body["tenantId"]
|
||||
routing_key = message.routing_key
|
||||
|
||||
if routing_key == "tenant.created":
|
||||
await self.create_tenant_queues(tenant_id)
|
||||
elif routing_key == "tenant.delete":
|
||||
await self.delete_tenant_queues(tenant_id)
|
||||
except Exception as e:
|
||||
logger.error(e, exc_info=True)
|
||||
|
||||
async def create_tenant_queues(self, tenant_id: str) -> None:
|
||||
queue_name = f"{self.config.input_queue_prefix}_{tenant_id}"
|
||||
logger.info(f"Declaring queue: {queue_name}")
|
||||
try:
|
||||
input_queue = await self.channel.declare_queue(
|
||||
queue_name,
|
||||
durable=True,
|
||||
arguments={
|
||||
"x-dead-letter-exchange": "",
|
||||
"x-dead-letter-routing-key": self.config.service_dead_letter_queue_name,
|
||||
},
|
||||
)
|
||||
await input_queue.bind(self.input_exchange, routing_key=tenant_id)
|
||||
self.consumer_tags[tenant_id] = await input_queue.consume(self.process_input_message)
|
||||
self.tenant_queues[tenant_id] = input_queue
|
||||
logger.info(f"Created and started consuming queue for tenant {tenant_id}")
|
||||
except Exception as e:
|
||||
logger.error(e, exc_info=True)
|
||||
|
||||
async def delete_tenant_queues(self, tenant_id: str) -> None:
|
||||
if tenant_id in self.tenant_queues:
|
||||
# somehow queue.delete() does not work here
|
||||
await self.channel.queue_delete(f"{self.config.input_queue_prefix}_{tenant_id}")
|
||||
del self.tenant_queues[tenant_id]
|
||||
del self.consumer_tags[tenant_id]
|
||||
logger.info(f"Deleted queues for tenant {tenant_id}")
|
||||
|
||||
async def process_input_message(self, message: IncomingMessage) -> None:
|
||||
async def process_message_body_and_await_result(unpacked_message_body):
|
||||
async with self.semaphore:
|
||||
loop = asyncio.get_running_loop()
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as thread_pool_executor:
|
||||
logger.info("Processing payload in a separate thread.")
|
||||
result = await loop.run_in_executor(
|
||||
thread_pool_executor, self.message_processor, unpacked_message_body
|
||||
)
|
||||
return result
|
||||
|
||||
async with message.process(ignore_processed=True):
|
||||
if message.redelivered:
|
||||
logger.warning(f"Declining message with {message.delivery_tag=} due to it being redelivered.")
|
||||
await message.nack(requeue=False)
|
||||
return
|
||||
|
||||
if message.body.decode("utf-8") == "STOP":
|
||||
logger.info("Received stop signal, stopping consumption...")
|
||||
await message.ack()
|
||||
# TODO: shutdown is probably not the right call here - align w/ Dev what should happen on stop signal
|
||||
await self.shutdown()
|
||||
return
|
||||
|
||||
self.message_count += 1
|
||||
|
||||
try:
|
||||
tenant_id = message.routing_key
|
||||
|
||||
filtered_message_headers = (
|
||||
{k: v for k, v in message.headers.items() if k.lower().startswith("x-")} if message.headers else {}
|
||||
)
|
||||
|
||||
logger.debug(f"Processing message with {filtered_message_headers=}.")
|
||||
|
||||
result: dict = await (
|
||||
process_message_body_and_await_result({**json.loads(message.body), **filtered_message_headers})
|
||||
or {}
|
||||
)
|
||||
|
||||
if result:
|
||||
await self.publish_to_output_exchange(tenant_id, result, filtered_message_headers)
|
||||
await message.ack()
|
||||
logger.debug(f"Message with {message.delivery_tag=} acknowledged.")
|
||||
else:
|
||||
raise ValueError(f"Could not process message with {message.body=}.")
|
||||
|
||||
except json.JSONDecodeError:
|
||||
await message.nack(requeue=False)
|
||||
logger.error(f"Invalid JSON in input message: {message.body}", exc_info=True)
|
||||
except FileNotFoundError as e:
|
||||
logger.warning(f"{e}, declining message with {message.delivery_tag=}.", exc_info=True)
|
||||
await message.nack(requeue=False)
|
||||
except Exception as e:
|
||||
await message.nack(requeue=False)
|
||||
logger.error(f"Error processing input message: {e}", exc_info=True)
|
||||
finally:
|
||||
self.message_count -= 1
|
||||
|
||||
async def publish_to_output_exchange(self, tenant_id: str, result: Dict[str, Any], headers: Dict[str, Any]) -> None:
|
||||
await self.output_exchange.publish(
|
||||
Message(body=json.dumps(result).encode(), headers=headers),
|
||||
routing_key=tenant_id,
|
||||
)
|
||||
logger.info(f"Published result to queue {tenant_id}.")
|
||||
|
||||
@retry(tries=5, exceptions=(aiohttp.ClientResponseError, aiohttp.ClientConnectorError), reraise=True, logger=logger)
|
||||
async def fetch_active_tenants(self) -> Set[str]:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(self.tenant_service_url) as response:
|
||||
response.raise_for_status()
|
||||
if response.headers["content-type"].lower() == "application/json":
|
||||
data = await response.json()
|
||||
return {tenant["tenantId"] for tenant in data}
|
||||
else:
|
||||
logger.error(
|
||||
f"Failed to fetch active tenants. Content type is not JSON: {response.headers['content-type'].lower()}"
|
||||
)
|
||||
return set()
|
||||
|
||||
@retry(
|
||||
tries=5,
|
||||
exceptions=(
|
||||
AMQPConnectionError,
|
||||
ChannelInvalidStateError,
|
||||
),
|
||||
reraise=True,
|
||||
logger=logger,
|
||||
)
|
||||
async def initialize_tenant_queues(self, active_tenants: set) -> None:
|
||||
for tenant_id in active_tenants:
|
||||
await self.create_tenant_queues(tenant_id)
|
||||
|
||||
async def run(self, active_tenants: set) -> None:
|
||||
|
||||
await self.connect()
|
||||
await self.setup_exchanges()
|
||||
await self.initialize_tenant_queues(active_tenants=active_tenants)
|
||||
await self.setup_tenant_queue()
|
||||
|
||||
logger.info("RabbitMQ handler is running. Press CTRL+C to exit.")
|
||||
|
||||
async def close_channels(self) -> None:
|
||||
try:
|
||||
if self.channel and not self.channel.is_closed:
|
||||
# Cancel queues to stop fetching messages
|
||||
logger.debug("Cancelling queues...")
|
||||
for tenant, queue in self.tenant_queues.items():
|
||||
await queue.cancel(self.consumer_tags[tenant])
|
||||
if self.tenant_exchange_queue:
|
||||
await self.tenant_exchange_queue.cancel(self.consumer_tags["tenant_exchange_queue"])
|
||||
while self.message_count != 0:
|
||||
logger.debug(f"Messages are still being processed: {self.message_count=} ")
|
||||
await asyncio.sleep(2)
|
||||
await self.channel.close(exc=asyncio.CancelledError)
|
||||
logger.debug("Channel closed.")
|
||||
else:
|
||||
logger.debug("No channel to close.")
|
||||
except ChannelClosed:
|
||||
logger.warning("Channel was already closed.")
|
||||
except ConnectionClosed:
|
||||
logger.warning("Connection was lost, unable to close channel.")
|
||||
except Exception as e:
|
||||
logger.error(f"Error during channel shutdown: {e}")
|
||||
|
||||
async def close_connection(self) -> None:
|
||||
try:
|
||||
if self.connection and not self.connection.is_closed:
|
||||
await self.connection.close(exc=asyncio.CancelledError)
|
||||
logger.debug("Connection closed.")
|
||||
else:
|
||||
logger.debug("No connection to close.")
|
||||
except ConnectionClosed:
|
||||
logger.warning("Connection was already closed.")
|
||||
except Exception as e:
|
||||
logger.error(f"Error closing connection: {e}")
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
logger.info("Shutting down RabbitMQ handler...")
|
||||
await self.close_channels()
|
||||
await self.close_connection()
|
||||
logger.info("RabbitMQ handler shut down successfully.")
|
||||
42
pyinfra/queue/callback.py
Normal file
42
pyinfra/queue/callback.py
Normal file
@ -0,0 +1,42 @@
|
||||
from typing import Callable
|
||||
|
||||
from dynaconf import Dynaconf
|
||||
from kn_utils.logging import logger
|
||||
|
||||
from pyinfra.storage.connection import get_storage
|
||||
from pyinfra.storage.utils import (
|
||||
download_data_bytes_as_specified_in_message,
|
||||
upload_data_as_specified_in_message,
|
||||
DownloadedData,
|
||||
)
|
||||
|
||||
DataProcessor = Callable[[dict[str, DownloadedData] | DownloadedData, dict], dict | list | str]
|
||||
Callback = Callable[[dict], dict]
|
||||
|
||||
|
||||
def make_download_process_upload_callback(data_processor: DataProcessor, settings: Dynaconf) -> Callback:
|
||||
"""Default callback for processing queue messages.
|
||||
|
||||
Data will be downloaded from the storage as specified in the message. If a tenant id is specified, the storage
|
||||
will be configured to use that tenant id, otherwise the storage is configured as specified in the settings.
|
||||
The data is the passed to the dataprocessor, together with the message. The dataprocessor should return a
|
||||
json serializable object. This object is then uploaded to the storage as specified in the message. The response
|
||||
message is just the original message.
|
||||
"""
|
||||
|
||||
def inner(queue_message_payload: dict) -> dict:
|
||||
logger.info(f"Processing payload with download-process-upload callback...")
|
||||
|
||||
storage = get_storage(settings, queue_message_payload.get("X-TENANT-ID"))
|
||||
|
||||
data: dict[str, DownloadedData] | DownloadedData = download_data_bytes_as_specified_in_message(
|
||||
storage, queue_message_payload
|
||||
)
|
||||
|
||||
result = data_processor(data, queue_message_payload)
|
||||
|
||||
upload_data_as_specified_in_message(storage, queue_message_payload, result)
|
||||
|
||||
return queue_message_payload
|
||||
|
||||
return inner
|
||||
@ -1,16 +0,0 @@
|
||||
from pyinfra.queue.queue_manager.queue_manager import QueueManager
|
||||
|
||||
|
||||
class Consumer:
|
||||
def __init__(self, visitor, queue_manager: QueueManager):
|
||||
self.queue_manager = queue_manager
|
||||
self.visitor = visitor
|
||||
|
||||
def consume_and_publish(self, n=None):
|
||||
self.queue_manager.consume_and_publish(self.visitor, n=n)
|
||||
|
||||
def basic_consume_and_publish(self):
|
||||
self.queue_manager.basic_consume_and_publish(self.visitor)
|
||||
|
||||
def consume(self, **kwargs):
|
||||
return self.queue_manager.consume(**kwargs)
|
||||
229
pyinfra/queue/manager.py
Normal file
229
pyinfra/queue/manager.py
Normal file
@ -0,0 +1,229 @@
|
||||
import atexit
|
||||
import concurrent.futures
|
||||
import json
|
||||
import logging
|
||||
import signal
|
||||
import sys
|
||||
from typing import Callable, Union
|
||||
|
||||
import pika
|
||||
import pika.exceptions
|
||||
from dynaconf import Dynaconf
|
||||
from kn_utils.logging import logger
|
||||
from kn_utils.retry import retry
|
||||
from pika.adapters.blocking_connection import BlockingChannel, BlockingConnection
|
||||
|
||||
from pyinfra.config.loader import validate_settings
|
||||
from pyinfra.config.validators import queue_manager_validators
|
||||
|
||||
pika_logger = logging.getLogger("pika")
|
||||
pika_logger.setLevel(logging.WARNING) # disables non-informative pika log clutter
|
||||
|
||||
MessageProcessor = Callable[[dict], dict]
|
||||
|
||||
|
||||
class QueueManager:
|
||||
def __init__(self, settings: Dynaconf):
|
||||
validate_settings(settings, queue_manager_validators)
|
||||
|
||||
self.input_queue = settings.rabbitmq.input_queue
|
||||
self.output_queue = settings.rabbitmq.output_queue
|
||||
self.dead_letter_queue = settings.rabbitmq.dead_letter_queue
|
||||
|
||||
self.connection_parameters = self.create_connection_parameters(settings)
|
||||
|
||||
self.connection: Union[BlockingConnection, None] = None
|
||||
self.channel: Union[BlockingChannel, None] = None
|
||||
self.connection_sleep = settings.rabbitmq.connection_sleep
|
||||
self.processing_callback = False
|
||||
self.received_signal = False
|
||||
|
||||
atexit.register(self.stop_consuming)
|
||||
signal.signal(signal.SIGTERM, self._handle_stop_signal)
|
||||
signal.signal(signal.SIGINT, self._handle_stop_signal)
|
||||
|
||||
self.max_retries = settings.rabbitmq.max_retries or 5
|
||||
self.max_delay = settings.rabbitmq.max_delay or 60
|
||||
|
||||
@staticmethod
|
||||
def create_connection_parameters(settings: Dynaconf):
|
||||
credentials = pika.PlainCredentials(username=settings.rabbitmq.username, password=settings.rabbitmq.password)
|
||||
pika_connection_params = {
|
||||
"host": settings.rabbitmq.host,
|
||||
"port": settings.rabbitmq.port,
|
||||
"credentials": credentials,
|
||||
"heartbeat": settings.rabbitmq.heartbeat,
|
||||
}
|
||||
|
||||
return pika.ConnectionParameters(**pika_connection_params)
|
||||
|
||||
@retry(
|
||||
tries=5,
|
||||
exceptions=(pika.exceptions.AMQPConnectionError, pika.exceptions.ChannelClosedByBroker),
|
||||
reraise=True,
|
||||
)
|
||||
def establish_connection(self):
|
||||
if self.connection and self.connection.is_open:
|
||||
logger.debug("Connection to RabbitMQ already established.")
|
||||
return
|
||||
|
||||
logger.info("Establishing connection to RabbitMQ...")
|
||||
self.connection = pika.BlockingConnection(parameters=self.connection_parameters)
|
||||
|
||||
logger.debug("Opening channel...")
|
||||
self.channel = self.connection.channel()
|
||||
self.channel.basic_qos(prefetch_count=1)
|
||||
|
||||
args = {
|
||||
"x-dead-letter-exchange": "",
|
||||
"x-dead-letter-routing-key": self.dead_letter_queue,
|
||||
}
|
||||
|
||||
self.channel.queue_declare(self.input_queue, arguments=args, auto_delete=False, durable=True)
|
||||
self.channel.queue_declare(self.output_queue, arguments=args, auto_delete=False, durable=True)
|
||||
|
||||
logger.info("Connection to RabbitMQ established, channel open.")
|
||||
|
||||
def is_ready(self):
|
||||
try:
|
||||
self.establish_connection()
|
||||
return self.channel.is_open
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to establish connection: {e}")
|
||||
return False
|
||||
|
||||
@retry(
|
||||
tries=5,
|
||||
exceptions=pika.exceptions.AMQPConnectionError,
|
||||
reraise=True,
|
||||
)
|
||||
def start_consuming(self, message_processor: Callable):
|
||||
on_message_callback = self._make_on_message_callback(message_processor)
|
||||
|
||||
try:
|
||||
self.establish_connection()
|
||||
self.channel.basic_consume(self.input_queue, on_message_callback)
|
||||
logger.info("Starting to consume messages...")
|
||||
self.channel.start_consuming()
|
||||
except pika.exceptions.AMQPConnectionError as e:
|
||||
logger.error(f"AMQP Connection Error: {e}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"An unexpected error occurred while consuming messages: {e}", exc_info=True)
|
||||
raise
|
||||
finally:
|
||||
self.stop_consuming()
|
||||
|
||||
def stop_consuming(self):
|
||||
if self.channel and self.channel.is_open:
|
||||
logger.info("Stopping consuming...")
|
||||
self.channel.stop_consuming()
|
||||
logger.info("Closing channel...")
|
||||
self.channel.close()
|
||||
|
||||
if self.connection and self.connection.is_open:
|
||||
logger.info("Closing connection to RabbitMQ...")
|
||||
self.connection.close()
|
||||
|
||||
def publish_message_to_input_queue(self, message: Union[str, bytes, dict], properties: pika.BasicProperties = None):
|
||||
if isinstance(message, str):
|
||||
message = message.encode("utf-8")
|
||||
elif isinstance(message, dict):
|
||||
message = json.dumps(message).encode("utf-8")
|
||||
|
||||
self.establish_connection()
|
||||
self.channel.basic_publish(
|
||||
"",
|
||||
self.input_queue,
|
||||
properties=properties,
|
||||
body=message,
|
||||
)
|
||||
logger.info(f"Published message to queue {self.input_queue}.")
|
||||
|
||||
def purge_queues(self):
|
||||
self.establish_connection()
|
||||
try:
|
||||
self.channel.queue_purge(self.input_queue)
|
||||
self.channel.queue_purge(self.output_queue)
|
||||
logger.info("Queues purged.")
|
||||
except pika.exceptions.ChannelWrongStateError:
|
||||
pass
|
||||
|
||||
def get_message_from_output_queue(self):
|
||||
self.establish_connection()
|
||||
return self.channel.basic_get(self.output_queue, auto_ack=True)
|
||||
|
||||
def _make_on_message_callback(self, message_processor: MessageProcessor):
|
||||
def process_message_body_and_await_result(unpacked_message_body):
|
||||
# Processing the message in a separate thread is necessary for the main thread pika client to be able to
|
||||
# process data events (e.g. heartbeats) while the message is being processed.
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as thread_pool_executor:
|
||||
logger.info("Processing payload in separate thread.")
|
||||
future = thread_pool_executor.submit(message_processor, unpacked_message_body)
|
||||
|
||||
# TODO: This block is probably not necessary, but kept since the implications of removing it are
|
||||
# unclear. Remove it in a future iteration where less changes are being made to the code base.
|
||||
while future.running():
|
||||
logger.debug("Waiting for payload processing to finish...")
|
||||
self.connection.sleep(self.connection_sleep)
|
||||
|
||||
return future.result()
|
||||
|
||||
def on_message_callback(channel, method, properties, body):
|
||||
logger.info(f"Received message from queue with delivery_tag {method.delivery_tag}.")
|
||||
self.processing_callback = True
|
||||
|
||||
if method.redelivered:
|
||||
logger.warning(f"Declining message with {method.delivery_tag=} due to it being redelivered.")
|
||||
channel.basic_nack(method.delivery_tag, requeue=False)
|
||||
return
|
||||
|
||||
if body.decode("utf-8") == "STOP":
|
||||
logger.info(f"Received stop signal, stopping consuming...")
|
||||
channel.basic_ack(delivery_tag=method.delivery_tag)
|
||||
self.stop_consuming()
|
||||
return
|
||||
|
||||
try:
|
||||
filtered_message_headers = (
|
||||
{k: v for k, v in properties.headers.items() if k.lower().startswith("x-")}
|
||||
if properties.headers
|
||||
else {}
|
||||
)
|
||||
logger.debug(f"Processing message with {filtered_message_headers=}.")
|
||||
result: dict = (
|
||||
process_message_body_and_await_result({**json.loads(body), **filtered_message_headers}) or {}
|
||||
)
|
||||
|
||||
channel.basic_publish(
|
||||
"",
|
||||
self.output_queue,
|
||||
json.dumps(result).encode(),
|
||||
properties=pika.BasicProperties(headers=filtered_message_headers),
|
||||
)
|
||||
logger.info(f"Published result to queue {self.output_queue}.")
|
||||
|
||||
channel.basic_ack(delivery_tag=method.delivery_tag)
|
||||
logger.debug(f"Message with {method.delivery_tag=} acknowledged.")
|
||||
except FileNotFoundError as e:
|
||||
logger.warning(f"{e}, declining message with {method.delivery_tag=}.")
|
||||
channel.basic_nack(method.delivery_tag, requeue=False)
|
||||
except Exception:
|
||||
logger.warning(f"Failed to process message with {method.delivery_tag=}, declining...", exc_info=True)
|
||||
channel.basic_nack(method.delivery_tag, requeue=False)
|
||||
raise
|
||||
|
||||
finally:
|
||||
self.processing_callback = False
|
||||
if self.received_signal:
|
||||
self.stop_consuming()
|
||||
sys.exit(0)
|
||||
|
||||
return on_message_callback
|
||||
|
||||
def _handle_stop_signal(self, signum, *args, **kwargs):
|
||||
logger.info(f"Received signal {signum}, stopping consuming...")
|
||||
self.received_signal = True
|
||||
if not self.processing_callback:
|
||||
self.stop_consuming()
|
||||
sys.exit(0)
|
||||
@ -1,172 +0,0 @@
|
||||
import json
|
||||
import logging
|
||||
from itertools import islice
|
||||
|
||||
import pika
|
||||
|
||||
from pyinfra.config import CONFIG
|
||||
from pyinfra.exceptions import ProcessingFailure, DataLoadingFailure
|
||||
from pyinfra.queue.queue_manager.queue_manager import QueueHandle, QueueManager
|
||||
from pyinfra.visitor import QueueVisitor
|
||||
|
||||
logger = logging.getLogger("pika")
|
||||
logger.setLevel(logging.WARNING)
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
def monkey_patch_queue_handle(channel, queue) -> QueueHandle:
|
||||
|
||||
empty_message = (None, None, None)
|
||||
|
||||
def is_empty_message(message):
|
||||
return message == empty_message
|
||||
|
||||
queue_handle = QueueHandle()
|
||||
queue_handle.empty = lambda: is_empty_message(channel.basic_get(queue))
|
||||
|
||||
def produce_items():
|
||||
|
||||
while True:
|
||||
message = channel.basic_get(queue)
|
||||
|
||||
if is_empty_message(message):
|
||||
break
|
||||
|
||||
method_frame, properties, body = message
|
||||
channel.basic_ack(method_frame.delivery_tag)
|
||||
yield json.loads(body)
|
||||
|
||||
queue_handle.to_list = lambda: list(produce_items())
|
||||
|
||||
return queue_handle
|
||||
|
||||
|
||||
def get_connection_params():
|
||||
|
||||
credentials = pika.PlainCredentials(username=CONFIG.rabbitmq.user, password=CONFIG.rabbitmq.password)
|
||||
kwargs = {
|
||||
"host": CONFIG.rabbitmq.host,
|
||||
"port": CONFIG.rabbitmq.port,
|
||||
"credentials": credentials,
|
||||
"heartbeat": CONFIG.rabbitmq.heartbeat,
|
||||
}
|
||||
parameters = pika.ConnectionParameters(**kwargs)
|
||||
|
||||
return parameters
|
||||
|
||||
|
||||
def get_n_previous_attempts(props):
|
||||
return 0 if props.headers is None else props.headers.get("x-retry-count", 0)
|
||||
|
||||
|
||||
def attempts_remain(n_attempts, max_attempts):
|
||||
return n_attempts < max_attempts
|
||||
|
||||
|
||||
class PikaQueueManager(QueueManager):
|
||||
def __init__(self, input_queue, output_queue, dead_letter_queue=None, connection_params=None):
|
||||
super().__init__(input_queue, output_queue)
|
||||
|
||||
if not connection_params:
|
||||
connection_params = get_connection_params()
|
||||
|
||||
self.connection = pika.BlockingConnection(parameters=connection_params)
|
||||
self.channel = self.connection.channel()
|
||||
self.channel.basic_qos(prefetch_count=1)
|
||||
|
||||
if not dead_letter_queue:
|
||||
dead_letter_queue = CONFIG.rabbitmq.queues.dead_letter
|
||||
|
||||
args = {"x-dead-letter-exchange": "", "x-dead-letter-routing-key": dead_letter_queue}
|
||||
|
||||
self.channel.queue_declare(input_queue, arguments=args, auto_delete=False, durable=True)
|
||||
self.channel.queue_declare(output_queue, arguments=args, auto_delete=False, durable=True)
|
||||
|
||||
def republish(self, body: bytes, n_current_attempts, frame):
|
||||
self.channel.basic_publish(
|
||||
exchange="",
|
||||
routing_key=self._input_queue,
|
||||
body=body,
|
||||
properties=pika.BasicProperties(headers={"x-retry-count": n_current_attempts}),
|
||||
)
|
||||
self.channel.basic_ack(delivery_tag=frame.delivery_tag)
|
||||
|
||||
def publish_request(self, request):
|
||||
logger.debug(f"Publishing {request}")
|
||||
self.channel.basic_publish("", self._input_queue, json.dumps(request).encode())
|
||||
|
||||
def reject(self, body, frame):
|
||||
logger.error(f"Adding to dead letter queue: {body}")
|
||||
self.channel.basic_reject(delivery_tag=frame.delivery_tag, requeue=False)
|
||||
|
||||
def publish_response(self, message, visitor: QueueVisitor, max_attempts=3):
|
||||
|
||||
logger.debug(f"Processing {message}.")
|
||||
|
||||
frame, properties, body = message
|
||||
|
||||
n_attempts = get_n_previous_attempts(properties) + 1
|
||||
|
||||
try:
|
||||
response_messages = visitor(json.loads(body))
|
||||
|
||||
if isinstance(response_messages, dict):
|
||||
response_messages = [response_messages]
|
||||
|
||||
for response_message in response_messages:
|
||||
response_message = json.dumps(response_message).encode()
|
||||
self.channel.basic_publish("", self._output_queue, response_message)
|
||||
|
||||
self.channel.basic_ack(frame.delivery_tag)
|
||||
except (ProcessingFailure, DataLoadingFailure):
|
||||
|
||||
logger.error(f"Message failed to process {n_attempts}/{max_attempts} times: {body}")
|
||||
|
||||
if attempts_remain(n_attempts, max_attempts):
|
||||
self.republish(body, n_attempts, frame)
|
||||
else:
|
||||
self.reject(body, frame)
|
||||
|
||||
def pull_request(self):
|
||||
return self.channel.basic_get(self._input_queue)
|
||||
|
||||
def consume(self, inactivity_timeout=None, n=None):
|
||||
logger.debug("Consuming")
|
||||
gen = self.channel.consume(self._input_queue, inactivity_timeout=inactivity_timeout)
|
||||
yield from islice(gen, n)
|
||||
|
||||
def consume_and_publish(self, visitor: QueueVisitor, n=None):
|
||||
|
||||
logger.info(f"Consuming input queue.")
|
||||
|
||||
for message in self.consume(n=n):
|
||||
self.publish_response(message, visitor)
|
||||
|
||||
def basic_consume_and_publish(self, visitor: QueueVisitor):
|
||||
|
||||
logger.info(f"Basic consuming input queue.")
|
||||
|
||||
def callback(channel, frame, properties, body):
|
||||
message = (frame, properties, body)
|
||||
return self.publish_response(message, visitor)
|
||||
|
||||
self.channel.basic_consume(self._input_queue, callback)
|
||||
self.channel.start_consuming()
|
||||
|
||||
def clear(self):
|
||||
try:
|
||||
self.channel.queue_purge(self._input_queue)
|
||||
self.channel.queue_purge(self._output_queue)
|
||||
assert self.input_queue.to_list() == []
|
||||
assert self.output_queue.to_list() == []
|
||||
except pika.exceptions.ChannelWrongStateError:
|
||||
pass
|
||||
|
||||
@property
|
||||
def input_queue(self) -> QueueHandle:
|
||||
return monkey_patch_queue_handle(self.channel, self._input_queue)
|
||||
|
||||
@property
|
||||
def output_queue(self) -> QueueHandle:
|
||||
return monkey_patch_queue_handle(self.channel, self._output_queue)
|
||||
@ -1,51 +0,0 @@
|
||||
import abc
|
||||
|
||||
|
||||
class QueueHandle:
|
||||
def empty(self) -> bool:
|
||||
raise NotImplementedError
|
||||
|
||||
def to_list(self) -> list:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class QueueManager(abc.ABC):
|
||||
def __init__(self, input_queue, output_queue):
|
||||
self._input_queue = input_queue
|
||||
self._output_queue = output_queue
|
||||
|
||||
@abc.abstractmethod
|
||||
def publish_request(self, request):
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def publish_response(self, response, callback):
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def pull_request(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def consume(self, **kwargs):
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def clear(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def input_queue(self) -> QueueHandle:
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def output_queue(self) -> QueueHandle:
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def consume_and_publish(self, callback, n=None):
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def basic_consume_and_publish(self, callback):
|
||||
raise NotImplementedError
|
||||
@ -1,37 +0,0 @@
|
||||
import logging
|
||||
from collections import deque
|
||||
|
||||
from funcy import repeatedly, identity
|
||||
|
||||
from pyinfra.exceptions import NoBufferCapacity
|
||||
from pyinfra.server.nothing import Nothing
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def bufferize(fn, buffer_size=3, persist_fn=identity, null_value=None):
|
||||
def buffered_fn(item):
|
||||
|
||||
if item is not Nothing:
|
||||
buffer.append(persist_fn(item))
|
||||
|
||||
response_payload = fn(repeatedly(buffer.popleft, n_items_to_pop(buffer, item is Nothing)))
|
||||
|
||||
return response_payload or null_value
|
||||
|
||||
def buffer_full(current_buffer_size):
|
||||
if current_buffer_size > buffer_size:
|
||||
logger.warning(f"Overfull buffer. size: {current_buffer_size}; intended capacity: {buffer_size}")
|
||||
|
||||
return current_buffer_size == buffer_size
|
||||
|
||||
def n_items_to_pop(buffer, final):
|
||||
current_buffer_size = len(buffer)
|
||||
return (final or buffer_full(current_buffer_size)) * current_buffer_size
|
||||
|
||||
if not buffer_size > 0:
|
||||
raise NoBufferCapacity("Buffer size must be greater than zero.")
|
||||
|
||||
buffer = deque()
|
||||
|
||||
return buffered_fn
|
||||
@ -1,24 +0,0 @@
|
||||
from collections import deque
|
||||
from itertools import takewhile
|
||||
|
||||
from funcy import repeatedly
|
||||
|
||||
from pyinfra.server.nothing import is_not_nothing, Nothing
|
||||
|
||||
|
||||
def stream_queue(queue):
|
||||
yield from takewhile(is_not_nothing, repeatedly(queue.popleft))
|
||||
|
||||
|
||||
class Queue:
|
||||
def __init__(self):
|
||||
self.__queue = deque()
|
||||
|
||||
def append(self, package) -> None:
|
||||
self.__queue.append(package)
|
||||
|
||||
def popleft(self):
|
||||
return self.__queue.popleft() if self.__queue else Nothing
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self.__queue)
|
||||
@ -1,44 +0,0 @@
|
||||
from itertools import chain, takewhile
|
||||
from typing import Iterable
|
||||
|
||||
from funcy import first, repeatedly, mapcat
|
||||
|
||||
from pyinfra.server.buffering.bufferize import bufferize
|
||||
from pyinfra.server.nothing import Nothing, is_not_nothing
|
||||
|
||||
|
||||
class FlatStreamBuffer:
|
||||
"""Wraps a stream buffer and chains its output. Also flushes the stream buffer when applied to an iterable."""
|
||||
|
||||
def __init__(self, fn, buffer_size=3):
|
||||
"""Function `fn` needs to be mappable and return an iterable; ideally `fn` returns a generator."""
|
||||
self.stream_buffer = StreamBuffer(fn, buffer_size=buffer_size)
|
||||
|
||||
def __call__(self, items):
|
||||
items = chain(items, [Nothing])
|
||||
yield from mapcat(self.stream_buffer, items)
|
||||
|
||||
|
||||
class StreamBuffer:
|
||||
"""Puts a streaming function between an input and an output buffer."""
|
||||
|
||||
def __init__(self, fn, buffer_size=3):
|
||||
"""Function `fn` needs to be mappable and return an iterable; ideally `fn` returns a generator."""
|
||||
self.fn = bufferize(fn, buffer_size=buffer_size, null_value=[])
|
||||
self.result_stream = chain([])
|
||||
|
||||
def __call__(self, item) -> Iterable:
|
||||
self.push(item)
|
||||
yield from takewhile(is_not_nothing, repeatedly(self.pop))
|
||||
|
||||
def push(self, item):
|
||||
self.result_stream = chain(self.result_stream, self.compute(item))
|
||||
|
||||
def compute(self, item):
|
||||
try:
|
||||
yield from self.fn(item)
|
||||
except TypeError as err:
|
||||
raise TypeError("Function failed with type-error. Is it mappable?") from err
|
||||
|
||||
def pop(self):
|
||||
return first(chain(self.result_stream, [Nothing]))
|
||||
@ -1,16 +0,0 @@
|
||||
from funcy import rcompose, flatten
|
||||
|
||||
|
||||
# TODO: remove the dispatcher component from the pipeline; it no longer actually dispatches
|
||||
class ClientPipeline:
|
||||
def __init__(self, packer, dispatcher, receiver, interpreter):
|
||||
self.pipe = rcompose(
|
||||
packer,
|
||||
dispatcher,
|
||||
receiver,
|
||||
interpreter,
|
||||
flatten, # each analysis call returns an iterable. Can be empty, singleton or multi item. Hence, flatten.
|
||||
)
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
yield from self.pipe(*args, **kwargs)
|
||||
@ -1,27 +0,0 @@
|
||||
from itertools import tee
|
||||
from typing import Iterable
|
||||
|
||||
|
||||
def inspect(prefix="inspect", embed=False):
|
||||
"""Can be used to inspect compositions of generator functions by placing inbetween two functions."""
|
||||
|
||||
def inner(x):
|
||||
|
||||
if isinstance(x, Iterable) and not isinstance(x, dict) and not isinstance(x, tuple):
|
||||
x, y = tee(x)
|
||||
y = list(y)
|
||||
else:
|
||||
y = x
|
||||
|
||||
l = f" {len(y)} items" if isinstance(y, list) else ""
|
||||
|
||||
print(f"{prefix}{l}:", y)
|
||||
|
||||
if embed:
|
||||
import IPython
|
||||
|
||||
IPython.embed()
|
||||
|
||||
return x
|
||||
|
||||
return inner
|
||||
@ -1,30 +0,0 @@
|
||||
import abc
|
||||
from typing import Iterable
|
||||
|
||||
from more_itertools import peekable
|
||||
|
||||
from pyinfra.server.nothing import Nothing
|
||||
|
||||
|
||||
def has_next(peekable_iter):
|
||||
return peekable_iter.peek(Nothing) is not Nothing
|
||||
|
||||
|
||||
class Dispatcher:
|
||||
def __call__(self, packages: Iterable[dict]):
|
||||
yield from self.dispatch_methods(packages)
|
||||
|
||||
def dispatch_methods(self, packages):
|
||||
packages = peekable(packages)
|
||||
for package in packages:
|
||||
method = self.patch if has_next(packages) else self.post
|
||||
response = method(package)
|
||||
yield response
|
||||
|
||||
@abc.abstractmethod
|
||||
def patch(self, package):
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def post(self, package):
|
||||
raise NotImplementedError
|
||||
@ -1,21 +0,0 @@
|
||||
from itertools import takewhile
|
||||
|
||||
from funcy import repeatedly, notnone
|
||||
|
||||
from pyinfra.server.dispatcher.dispatcher import Dispatcher
|
||||
from pyinfra.server.stream.queued_stream_function import QueuedStreamFunction
|
||||
|
||||
|
||||
class QueuedStreamFunctionDispatcher(Dispatcher):
|
||||
def __init__(self, queued_stream_function: QueuedStreamFunction):
|
||||
self.queued_stream_function = queued_stream_function
|
||||
|
||||
def patch(self, package):
|
||||
self.queued_stream_function.push(package)
|
||||
# TODO: this is wonky and a result of the pipeline components having shifted behaviour through previous
|
||||
# refactorings. The analogous functionality for the rest pipeline is in the interpreter. Correct this
|
||||
# asymmetry!
|
||||
yield from takewhile(notnone, repeatedly(self.queued_stream_function.pop))
|
||||
|
||||
def post(self, package):
|
||||
yield from self.patch(package)
|
||||
@ -1,14 +0,0 @@
|
||||
import requests
|
||||
|
||||
from pyinfra.server.dispatcher.dispatcher import Dispatcher
|
||||
|
||||
|
||||
class RestDispatcher(Dispatcher):
|
||||
def __init__(self, endpoint):
|
||||
self.endpoint = endpoint
|
||||
|
||||
def patch(self, package):
|
||||
return requests.patch(self.endpoint, json=package)
|
||||
|
||||
def post(self, package):
|
||||
return requests.post(self.endpoint, json=package)
|
||||
@ -1,8 +0,0 @@
|
||||
import abc
|
||||
from typing import Iterable
|
||||
|
||||
|
||||
class Interpreter(abc.ABC):
|
||||
@abc.abstractmethod
|
||||
def __call__(self, payloads: Iterable):
|
||||
pass
|
||||
@ -1,8 +0,0 @@
|
||||
from typing import Iterable
|
||||
|
||||
from pyinfra.server.interpreter.interpreter import Interpreter
|
||||
|
||||
|
||||
class IdentityInterpreter(Interpreter):
|
||||
def __call__(self, payloads: Iterable):
|
||||
yield from payloads
|
||||
@ -1,23 +0,0 @@
|
||||
from typing import Iterable
|
||||
|
||||
import requests
|
||||
from funcy import takewhile, repeatedly, mapcat
|
||||
|
||||
from pyinfra.server.interpreter.interpreter import Interpreter
|
||||
|
||||
|
||||
def stream_responses(endpoint):
|
||||
def receive():
|
||||
response = requests.get(endpoint)
|
||||
return response
|
||||
|
||||
def more_is_coming(response):
|
||||
return response.status_code == 206
|
||||
|
||||
response_stream = takewhile(more_is_coming, repeatedly(receive))
|
||||
yield from response_stream
|
||||
|
||||
|
||||
class RestPickupStreamer(Interpreter):
|
||||
def __call__(self, payloads: Iterable):
|
||||
yield from mapcat(stream_responses, payloads)
|
||||
@ -1,39 +0,0 @@
|
||||
from functools import lru_cache
|
||||
|
||||
from funcy import identity
|
||||
from prometheus_client import CollectorRegistry, Summary
|
||||
|
||||
from pyinfra.server.operation_dispatcher import OperationDispatcher
|
||||
|
||||
|
||||
class OperationDispatcherMonitoringDecorator:
|
||||
def __init__(self, operation_dispatcher: OperationDispatcher, naming_policy=identity):
|
||||
self.operation_dispatcher = operation_dispatcher
|
||||
self.operation2metric = {}
|
||||
self.naming_policy = naming_policy
|
||||
|
||||
@property
|
||||
@lru_cache(maxsize=None)
|
||||
def registry(self):
|
||||
return CollectorRegistry(auto_describe=True)
|
||||
|
||||
def make_summary_instance(self, op: str):
|
||||
return Summary(f"{self.naming_policy(op)}_seconds", f"Time spent on {op}.", registry=self.registry)
|
||||
|
||||
def submit(self, operation, request):
|
||||
return self.operation_dispatcher.submit(operation, request)
|
||||
|
||||
def pickup(self, operation):
|
||||
with self.get_monitor(operation):
|
||||
return self.operation_dispatcher.pickup(operation)
|
||||
|
||||
def get_monitor(self, operation):
|
||||
monitor = self.operation2metric.get(operation, None) or self.register_operation(operation)
|
||||
return monitor.time()
|
||||
|
||||
def register_operation(self, operation):
|
||||
summary = self.make_summary_instance(operation)
|
||||
self.operation2metric[operation] = summary
|
||||
return summary
|
||||
|
||||
|
||||
@ -1,17 +0,0 @@
|
||||
from itertools import chain
|
||||
from typing import Iterable, Union, Tuple
|
||||
|
||||
from pyinfra.exceptions import UnexpectedItemType
|
||||
|
||||
|
||||
def normalize(itr: Iterable[Union[Tuple, Iterable]]) -> Iterable[Tuple]:
|
||||
return chain.from_iterable(map(normalize_item, normalize_item(itr)))
|
||||
|
||||
|
||||
def normalize_item(itm: Union[Tuple, Iterable]) -> Iterable:
|
||||
if isinstance(itm, tuple):
|
||||
return [itm]
|
||||
elif isinstance(itm, Iterable):
|
||||
return itm
|
||||
else:
|
||||
raise UnexpectedItemType("Encountered an item that could not be normalized to a list.")
|
||||
@ -1,6 +0,0 @@
|
||||
class Nothing:
|
||||
pass
|
||||
|
||||
|
||||
def is_not_nothing(x):
|
||||
return x is not Nothing
|
||||
@ -1,33 +0,0 @@
|
||||
from itertools import starmap, tee
|
||||
from typing import Dict
|
||||
|
||||
from funcy import juxt, zipdict, cat
|
||||
|
||||
from pyinfra.server.stream.queued_stream_function import QueuedStreamFunction
|
||||
from pyinfra.server.stream.rest import LazyRestProcessor
|
||||
|
||||
|
||||
class OperationDispatcher:
|
||||
def __init__(self, operation2function: Dict[str, QueuedStreamFunction]):
|
||||
submit_suffixes, pickup_suffixes = zip(*map(juxt(submit_suffix, pickup_suffix), operation2function))
|
||||
processors = starmap(LazyRestProcessor, zip(operation2function.values(), submit_suffixes, pickup_suffixes))
|
||||
self.operation2processor = zipdict(submit_suffixes + pickup_suffixes, cat(tee(processors)))
|
||||
|
||||
@classmethod
|
||||
@property
|
||||
def pickup_suffix(cls):
|
||||
return pickup_suffix("")
|
||||
|
||||
def submit(self, operation, request):
|
||||
return self.operation2processor[operation].push(request)
|
||||
|
||||
def pickup(self, operation):
|
||||
return self.operation2processor[operation].pop()
|
||||
|
||||
|
||||
def submit_suffix(op: str):
|
||||
return "" if not op else op
|
||||
|
||||
|
||||
def pickup_suffix(op: str):
|
||||
return "pickup" if not op else f"{op}_pickup"
|
||||
@ -1,8 +0,0 @@
|
||||
import abc
|
||||
from typing import Iterable
|
||||
|
||||
|
||||
class Packer(abc.ABC):
|
||||
@abc.abstractmethod
|
||||
def __call__(self, data: Iterable, metadata: Iterable):
|
||||
pass
|
||||
@ -1,14 +0,0 @@
|
||||
from itertools import starmap
|
||||
from typing import Iterable
|
||||
|
||||
from pyinfra.server.packer.packer import Packer
|
||||
|
||||
|
||||
def bundle(data: bytes, metadata: dict):
|
||||
package = {"data": data, "metadata": metadata}
|
||||
return package
|
||||
|
||||
|
||||
class IdentityPacker(Packer):
|
||||
def __call__(self, data: Iterable, metadata):
|
||||
yield from starmap(bundle, zip(data, metadata))
|
||||
@ -1,9 +0,0 @@
|
||||
from typing import Iterable
|
||||
|
||||
from pyinfra.server.packer.packer import Packer
|
||||
from pyinfra.server.packing import pack_data_and_metadata_for_rest_transfer
|
||||
|
||||
|
||||
class RestPacker(Packer):
|
||||
def __call__(self, data: Iterable[bytes], metadata: Iterable[dict]):
|
||||
yield from pack_data_and_metadata_for_rest_transfer(data, metadata)
|
||||
@ -1,34 +0,0 @@
|
||||
import base64
|
||||
from _operator import itemgetter
|
||||
from itertools import starmap
|
||||
from typing import Iterable
|
||||
|
||||
from funcy import compose
|
||||
|
||||
from pyinfra.utils.func import starlift, lift
|
||||
|
||||
|
||||
def pack_data_and_metadata_for_rest_transfer(data: Iterable, metadata: Iterable):
|
||||
yield from starmap(pack, zip(data, metadata))
|
||||
|
||||
|
||||
def unpack_fn_pack(fn):
|
||||
return compose(starlift(pack), fn, lift(unpack))
|
||||
|
||||
|
||||
def pack(data: bytes, metadata: dict):
|
||||
package = {"data": bytes_to_string(data), "metadata": metadata}
|
||||
return package
|
||||
|
||||
|
||||
def unpack(package):
|
||||
data, metadata = itemgetter("data", "metadata")(package)
|
||||
return string_to_bytes(data), metadata
|
||||
|
||||
|
||||
def bytes_to_string(data: bytes) -> str:
|
||||
return base64.b64encode(data).decode()
|
||||
|
||||
|
||||
def string_to_bytes(data: str) -> bytes:
|
||||
return base64.b64decode(data.encode())
|
||||
@ -1,8 +0,0 @@
|
||||
import abc
|
||||
from typing import Iterable
|
||||
|
||||
|
||||
class Receiver(abc.ABC):
|
||||
@abc.abstractmethod
|
||||
def __call__(self, package: Iterable):
|
||||
pass
|
||||
@ -1,11 +0,0 @@
|
||||
from typing import Iterable
|
||||
|
||||
from pyinfra.server.receiver.receiver import Receiver
|
||||
from funcy import notnone
|
||||
|
||||
|
||||
class QueuedStreamFunctionReceiver(Receiver):
|
||||
|
||||
def __call__(self, responses: Iterable):
|
||||
for response in filter(notnone, responses):
|
||||
yield response
|
||||
@ -1,16 +0,0 @@
|
||||
from typing import Iterable
|
||||
|
||||
import requests
|
||||
from funcy import chunks, flatten
|
||||
|
||||
from pyinfra.server.receiver.receiver import Receiver
|
||||
|
||||
|
||||
class RestReceiver(Receiver):
|
||||
def __init__(self, chunk_size=3):
|
||||
self.chunk_size = chunk_size
|
||||
|
||||
def __call__(self, responses: Iterable[requests.Response]):
|
||||
for response in flatten(chunks(self.chunk_size, responses)):
|
||||
response.raise_for_status()
|
||||
yield response.json()
|
||||
@ -1,100 +0,0 @@
|
||||
from functools import singledispatch
|
||||
from typing import Dict, Callable, Union
|
||||
|
||||
from flask import Flask, jsonify, request
|
||||
from prometheus_client import generate_latest
|
||||
|
||||
from pyinfra.config import CONFIG
|
||||
from pyinfra.server.buffering.stream import FlatStreamBuffer
|
||||
from pyinfra.server.monitoring import OperationDispatcherMonitoringDecorator
|
||||
from pyinfra.server.operation_dispatcher import OperationDispatcher
|
||||
from pyinfra.server.stream.queued_stream_function import QueuedStreamFunction
|
||||
|
||||
|
||||
@singledispatch
|
||||
def set_up_processing_server(arg: Union[dict, Callable], buffer_size=1):
|
||||
"""Produces a processing server given a streamable function or a mapping from operations to streamable functions.
|
||||
Streamable functions are constructed by calling pyinfra.server.utils.make_streamable_and_wrap_in_packing_logic on a
|
||||
function taking a tuple of data and metadata and also returning a tuple or yielding tuples of data and metadata.
|
||||
If the function doesn't produce data, data should be an empty byte string.
|
||||
If the function doesn't produce metadata, metadata should be an empty dictionary.
|
||||
|
||||
Args:
|
||||
arg: streamable function or mapping of operations: str to streamable functions
|
||||
buffer_size: If your function operates on batches this parameter controls how many items are aggregated before
|
||||
your function is applied.
|
||||
|
||||
TODO: buffer_size has to be controllable on per function basis.
|
||||
|
||||
Returns:
|
||||
Processing server: flask app
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@set_up_processing_server.register
|
||||
def _(operation2stream_fn: dict, buffer_size=1):
|
||||
return __stream_fn_to_processing_server(operation2stream_fn, buffer_size)
|
||||
|
||||
|
||||
@set_up_processing_server.register
|
||||
def _(stream_fn: object, buffer_size=1):
|
||||
operation2stream_fn = {None: stream_fn}
|
||||
return __stream_fn_to_processing_server(operation2stream_fn, buffer_size)
|
||||
|
||||
|
||||
def __stream_fn_to_processing_server(operation2stream_fn: dict, buffer_size):
|
||||
operation2stream_fn = {
|
||||
op: QueuedStreamFunction(FlatStreamBuffer(fn, buffer_size)) for op, fn in operation2stream_fn.items()
|
||||
}
|
||||
return __set_up_processing_server(operation2stream_fn)
|
||||
|
||||
|
||||
def __set_up_processing_server(operation2function: Dict[str, QueuedStreamFunction]):
|
||||
app = Flask(__name__)
|
||||
|
||||
dispatcher = OperationDispatcherMonitoringDecorator(
|
||||
OperationDispatcher(operation2function),
|
||||
naming_policy=naming_policy,
|
||||
)
|
||||
|
||||
def ok():
|
||||
resp = jsonify("OK")
|
||||
resp.status_code = 200
|
||||
return resp
|
||||
|
||||
@app.route("/ready", methods=["GET"])
|
||||
def ready():
|
||||
return ok()
|
||||
|
||||
@app.route("/health", methods=["GET"])
|
||||
def healthy():
|
||||
return ok()
|
||||
|
||||
@app.route("/prometheus", methods=["GET"])
|
||||
def prometheus():
|
||||
return generate_latest(registry=dispatcher.registry)
|
||||
|
||||
@app.route("/<operation>", methods=["POST", "PATCH"])
|
||||
def submit(operation):
|
||||
return dispatcher.submit(operation, request)
|
||||
|
||||
@app.route("/", methods=["POST", "PATCH"])
|
||||
def submit_default():
|
||||
return dispatcher.submit("", request)
|
||||
|
||||
@app.route("/<operation>", methods=["GET"])
|
||||
def pickup(operation):
|
||||
return dispatcher.pickup(operation)
|
||||
|
||||
return app
|
||||
|
||||
|
||||
def naming_policy(op_name: str):
|
||||
pop_suffix = OperationDispatcher.pickup_suffix
|
||||
prefix = f"redactmanager_{CONFIG.service.name}"
|
||||
|
||||
op_display_name = op_name.replace(f"_{pop_suffix}", "") if op_name != pop_suffix else "default"
|
||||
complete_display_name = f"{prefix}_{op_display_name}"
|
||||
|
||||
return complete_display_name
|
||||
@ -1,21 +0,0 @@
|
||||
from funcy import first
|
||||
|
||||
from pyinfra.server.buffering.queue import stream_queue, Queue
|
||||
|
||||
|
||||
class QueuedStreamFunction:
|
||||
def __init__(self, stream_function):
|
||||
"""Combines a stream function with a queue.
|
||||
|
||||
Args:
|
||||
stream_function: Needs to operate on iterables.
|
||||
"""
|
||||
self.queue = Queue()
|
||||
self.stream_function = stream_function
|
||||
|
||||
def push(self, item):
|
||||
self.queue.append(item)
|
||||
|
||||
def pop(self):
|
||||
items = stream_queue(self.queue)
|
||||
return first(self.stream_function(items))
|
||||
@ -1,51 +0,0 @@
|
||||
import logging
|
||||
|
||||
from flask import jsonify
|
||||
from funcy import drop
|
||||
|
||||
from pyinfra.server.nothing import Nothing
|
||||
from pyinfra.server.stream.queued_stream_function import QueuedStreamFunction
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LazyRestProcessor:
|
||||
def __init__(self, queued_stream_function: QueuedStreamFunction, submit_suffix="submit", pickup_suffix="pickup"):
|
||||
self.submit_suffix = submit_suffix
|
||||
self.pickup_suffix = pickup_suffix
|
||||
self.queued_stream_function = queued_stream_function
|
||||
|
||||
def push(self, request):
|
||||
self.queued_stream_function.push(request.json)
|
||||
return jsonify(replace_suffix(request.base_url, self.submit_suffix, self.pickup_suffix))
|
||||
|
||||
def pop(self):
|
||||
result = self.queued_stream_function.pop() or Nothing
|
||||
|
||||
if not valid(result):
|
||||
logger.error(f"Received invalid result: {result}")
|
||||
result = Nothing
|
||||
|
||||
if result is Nothing:
|
||||
logger.info("Analysis completed successfully.")
|
||||
resp = jsonify("No more items left")
|
||||
resp.status_code = 204
|
||||
|
||||
else:
|
||||
logger.debug("Partial analysis completed.")
|
||||
resp = jsonify(result)
|
||||
resp.status_code = 206
|
||||
|
||||
return resp
|
||||
|
||||
|
||||
def valid(result):
|
||||
return isinstance(result, dict) or result is Nothing
|
||||
|
||||
|
||||
def replace_suffix(strn, suf, repl):
|
||||
return remove_last_n(strn, len(suf)) + repl
|
||||
|
||||
|
||||
def remove_last_n(strn, n):
|
||||
return "".join(reversed(list(drop(n, reversed(strn)))))
|
||||
@ -1,16 +0,0 @@
|
||||
from funcy import compose, identity
|
||||
|
||||
from pyinfra.server.normalization import normalize
|
||||
from pyinfra.server.packing import unpack_fn_pack
|
||||
from pyinfra.utils.func import starlift
|
||||
|
||||
|
||||
def make_streamable_and_wrap_in_packing_logic(fn, batched):
|
||||
fn = make_streamable(fn, batched)
|
||||
fn = unpack_fn_pack(fn)
|
||||
return fn
|
||||
|
||||
|
||||
def make_streamable(fn, batched):
|
||||
# FIXME: something broken with batched == True
|
||||
return compose(normalize, (identity if batched else starlift)(fn))
|
||||
@ -1,34 +0,0 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class StorageAdapter(ABC):
|
||||
def __init__(self, client):
|
||||
self.__client = client
|
||||
|
||||
@abstractmethod
|
||||
def make_bucket(self, bucket_name):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def has_bucket(self, bucket_name):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def put_object(self, bucket_name, object_name, data):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def get_object(self, bucket_name, object_name):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def get_all_objects(self, bucket_name):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def clear_bucket(self, bucket_name):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def get_all_object_names(self, bucket_name, prefix=None):
|
||||
raise NotImplementedError
|
||||
@ -1,64 +0,0 @@
|
||||
import logging
|
||||
from operator import attrgetter
|
||||
|
||||
from azure.storage.blob import ContainerClient, BlobServiceClient
|
||||
|
||||
from pyinfra.storage.adapters.adapter import StorageAdapter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logging.getLogger("azure").setLevel(logging.WARNING)
|
||||
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
||||
|
||||
|
||||
class AzureStorageAdapter(StorageAdapter):
|
||||
def __init__(self, client):
|
||||
super().__init__(client=client)
|
||||
self.__client: BlobServiceClient = self._StorageAdapter__client
|
||||
|
||||
def has_bucket(self, bucket_name):
|
||||
container_client = self.__client.get_container_client(bucket_name)
|
||||
return container_client.exists()
|
||||
|
||||
def __provide_container_client(self, bucket_name) -> ContainerClient:
|
||||
self.make_bucket(bucket_name)
|
||||
container_client = self.__client.get_container_client(bucket_name)
|
||||
return container_client
|
||||
|
||||
def make_bucket(self, bucket_name):
|
||||
container_client = self.__client.get_container_client(bucket_name)
|
||||
container_client if container_client.exists() else self.__client.create_container(bucket_name)
|
||||
|
||||
def put_object(self, bucket_name, object_name, data):
|
||||
logger.debug(f"Uploading '{object_name}'...")
|
||||
container_client = self.__provide_container_client(bucket_name)
|
||||
blob_client = container_client.get_blob_client(object_name)
|
||||
blob_client.upload_blob(data, overwrite=True)
|
||||
|
||||
def get_object(self, bucket_name, object_name):
|
||||
logger.debug(f"Downloading '{object_name}'...")
|
||||
container_client = self.__provide_container_client(bucket_name)
|
||||
blob_client = container_client.get_blob_client(object_name)
|
||||
blob_data = blob_client.download_blob()
|
||||
return blob_data.readall()
|
||||
|
||||
def get_all_objects(self, bucket_name):
|
||||
|
||||
container_client = self.__provide_container_client(bucket_name)
|
||||
blobs = container_client.list_blobs()
|
||||
for blob in blobs:
|
||||
logger.debug(f"Downloading '{blob.name}'...")
|
||||
blob_client = container_client.get_blob_client(blob)
|
||||
blob_data = blob_client.download_blob()
|
||||
data = blob_data.readall()
|
||||
yield data
|
||||
|
||||
def clear_bucket(self, bucket_name):
|
||||
logger.debug(f"Clearing Azure container '{bucket_name}'...")
|
||||
container_client = self.__client.get_container_client(bucket_name)
|
||||
blobs = container_client.list_blobs()
|
||||
container_client.delete_blobs(*blobs)
|
||||
|
||||
def get_all_object_names(self, bucket_name, prefix=None):
|
||||
container_client = self.__provide_container_client(bucket_name)
|
||||
blobs = container_client.list_blobs(name_starts_with=prefix)
|
||||
return map(attrgetter("name"), blobs)
|
||||
@ -1,58 +0,0 @@
|
||||
import io
|
||||
import logging
|
||||
from itertools import repeat
|
||||
from operator import attrgetter
|
||||
|
||||
from minio import Minio
|
||||
|
||||
from pyinfra.exceptions import DataLoadingFailure
|
||||
from pyinfra.storage.adapters.adapter import StorageAdapter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class S3StorageAdapter(StorageAdapter):
|
||||
def __init__(self, client):
|
||||
super().__init__(client=client)
|
||||
self.__client: Minio = self._StorageAdapter__client
|
||||
|
||||
def make_bucket(self, bucket_name):
|
||||
if not self.has_bucket(bucket_name):
|
||||
self.__client.make_bucket(bucket_name)
|
||||
|
||||
def has_bucket(self, bucket_name):
|
||||
return self.__client.bucket_exists(bucket_name)
|
||||
|
||||
def put_object(self, bucket_name, object_name, data):
|
||||
logger.debug(f"Uploading '{object_name}'...")
|
||||
data = io.BytesIO(data)
|
||||
self.__client.put_object(bucket_name, object_name, data, length=data.getbuffer().nbytes)
|
||||
|
||||
def get_object(self, bucket_name, object_name):
|
||||
logger.debug(f"Downloading '{object_name}'...")
|
||||
response = None
|
||||
|
||||
try:
|
||||
response = self.__client.get_object(bucket_name, object_name)
|
||||
return response.data
|
||||
except Exception as err:
|
||||
raise DataLoadingFailure("Failed getting object from s3 client") from err
|
||||
finally:
|
||||
if response:
|
||||
response.close()
|
||||
response.release_conn()
|
||||
|
||||
def get_all_objects(self, bucket_name):
|
||||
for obj in self.__client.list_objects(bucket_name, recursive=True):
|
||||
logger.debug(f"Downloading '{obj.object_name}'...")
|
||||
yield self.get_object(bucket_name, obj.object_name)
|
||||
|
||||
def clear_bucket(self, bucket_name):
|
||||
logger.debug(f"Clearing S3 bucket '{bucket_name}'...")
|
||||
objects = self.__client.list_objects(bucket_name, recursive=True)
|
||||
for obj in objects:
|
||||
self.__client.remove_object(bucket_name, obj.object_name)
|
||||
|
||||
def get_all_object_names(self, bucket_name, prefix=None):
|
||||
objs = self.__client.list_objects(bucket_name, recursive=True, prefix=prefix)
|
||||
return map(attrgetter("object_name"), objs)
|
||||
@ -1,11 +0,0 @@
|
||||
from azure.storage.blob import BlobServiceClient
|
||||
|
||||
from pyinfra.config import CONFIG
|
||||
|
||||
|
||||
def get_azure_client(connection_string=None) -> BlobServiceClient:
|
||||
|
||||
if not connection_string:
|
||||
connection_string = CONFIG.storage.azure.connection_string
|
||||
|
||||
return BlobServiceClient.from_connection_string(conn_str=connection_string)
|
||||
@ -1,40 +0,0 @@
|
||||
import re
|
||||
|
||||
from minio import Minio
|
||||
|
||||
from pyinfra.config import CONFIG
|
||||
from pyinfra.exceptions import InvalidEndpoint
|
||||
|
||||
|
||||
def parse_endpoint(endpoint):
|
||||
# FIXME Greedy matching (.+) since we get random storage names on kubernetes (eg http://red-research-headless:9000)
|
||||
# FIXME this has been broken and accepts invalid URLs
|
||||
endpoint_pattern = r"(?P<protocol>https?)*(?:://)*(?P<address>(?:(?:(?:\d{1,3}\.){3}\d{1,3})|.+)(?:\:\d+)?)"
|
||||
|
||||
match = re.match(endpoint_pattern, endpoint)
|
||||
|
||||
if not match:
|
||||
raise InvalidEndpoint(f"Endpoint {endpoint} is invalid; expected {endpoint_pattern}")
|
||||
|
||||
return {"secure": match.group("protocol") == "https", "endpoint": match.group("address")}
|
||||
|
||||
|
||||
def get_s3_client(params=None) -> Minio:
|
||||
"""
|
||||
Args:
|
||||
params: dict like
|
||||
{
|
||||
"endpoint": <storage_endpoint>
|
||||
"access_key": <storage_key>
|
||||
"secret_key": <storage_secret>
|
||||
}
|
||||
"""
|
||||
if not params:
|
||||
params = CONFIG.storage.s3
|
||||
|
||||
return Minio(
|
||||
**parse_endpoint(params.endpoint),
|
||||
access_key=params.access_key,
|
||||
secret_key=params.secret_key,
|
||||
region=params.region,
|
||||
)
|
||||
89
pyinfra/storage/connection.py
Normal file
89
pyinfra/storage/connection.py
Normal file
@ -0,0 +1,89 @@
|
||||
from functools import lru_cache
|
||||
|
||||
import requests
|
||||
from dynaconf import Dynaconf
|
||||
from kn_utils.logging import logger
|
||||
|
||||
from pyinfra.config.loader import validate_settings
|
||||
from pyinfra.config.validators import (
|
||||
multi_tenant_storage_validators,
|
||||
storage_validators,
|
||||
)
|
||||
from pyinfra.storage.storages.azure import get_azure_storage_from_settings
|
||||
from pyinfra.storage.storages.s3 import get_s3_storage_from_settings
|
||||
from pyinfra.storage.storages.storage import Storage
|
||||
from pyinfra.utils.cipher import decrypt
|
||||
|
||||
|
||||
def get_storage(settings: Dynaconf, tenant_id: str = None) -> Storage:
|
||||
"""Establishes a storage connection.
|
||||
If tenant_id is provided, gets storage connection information from tenant server. These connections are cached.
|
||||
Otherwise, gets storage connection information from settings.
|
||||
"""
|
||||
logger.info("Establishing storage connection...")
|
||||
|
||||
if tenant_id:
|
||||
logger.info(f"Using tenant storage for {tenant_id}.")
|
||||
validate_settings(settings, multi_tenant_storage_validators)
|
||||
|
||||
return get_storage_for_tenant(
|
||||
tenant_id,
|
||||
settings.storage.tenant_server.endpoint,
|
||||
settings.storage.tenant_server.public_key,
|
||||
)
|
||||
|
||||
logger.info("Using default storage.")
|
||||
validate_settings(settings, storage_validators)
|
||||
|
||||
return storage_dispatcher[settings.storage.backend](settings)
|
||||
|
||||
|
||||
storage_dispatcher = {
|
||||
"azure": get_azure_storage_from_settings,
|
||||
"s3": get_s3_storage_from_settings,
|
||||
}
|
||||
|
||||
|
||||
@lru_cache(maxsize=10)
|
||||
def get_storage_for_tenant(tenant: str, endpoint: str, public_key: str) -> Storage:
|
||||
response = requests.get(f"{endpoint}/{tenant}").json()
|
||||
|
||||
maybe_azure = response.get("azureStorageConnection")
|
||||
maybe_s3 = response.get("s3StorageConnection")
|
||||
|
||||
assert (maybe_azure or maybe_s3) and not (maybe_azure and maybe_s3), "Only one storage backend can be used."
|
||||
|
||||
if maybe_azure:
|
||||
connection_string = decrypt(public_key, maybe_azure["connectionString"])
|
||||
backend = "azure"
|
||||
storage_info = {
|
||||
"storage": {
|
||||
"azure": {
|
||||
"connection_string": connection_string,
|
||||
"container": maybe_azure["containerName"],
|
||||
},
|
||||
}
|
||||
}
|
||||
elif maybe_s3:
|
||||
secret = decrypt(public_key, maybe_s3["secret"])
|
||||
backend = "s3"
|
||||
storage_info = {
|
||||
"storage": {
|
||||
"s3": {
|
||||
"endpoint": maybe_s3["endpoint"],
|
||||
"key": maybe_s3["key"],
|
||||
"secret": secret,
|
||||
"region": maybe_s3["region"],
|
||||
"bucket": maybe_s3["bucketName"],
|
||||
},
|
||||
}
|
||||
}
|
||||
else:
|
||||
raise Exception(f"Unknown storage backend in {response}.")
|
||||
|
||||
storage_settings = Dynaconf()
|
||||
storage_settings.update(storage_info)
|
||||
|
||||
storage = storage_dispatcher[backend](storage_settings)
|
||||
|
||||
return storage
|
||||
@ -1,44 +0,0 @@
|
||||
import logging
|
||||
|
||||
|
||||
from pyinfra.config import CONFIG
|
||||
from pyinfra.exceptions import DataLoadingFailure
|
||||
from pyinfra.storage.adapters.adapter import StorageAdapter
|
||||
from pyinfra.utils.retry import retry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(CONFIG.service.logging_level)
|
||||
|
||||
|
||||
class Storage:
|
||||
def __init__(self, adapter: StorageAdapter):
|
||||
self.__adapter = adapter
|
||||
|
||||
def make_bucket(self, bucket_name):
|
||||
self.__adapter.make_bucket(bucket_name)
|
||||
|
||||
def has_bucket(self, bucket_name):
|
||||
return self.__adapter.has_bucket(bucket_name)
|
||||
|
||||
def put_object(self, bucket_name, object_name, data):
|
||||
self.__adapter.put_object(bucket_name, object_name, data)
|
||||
|
||||
def get_object(self, bucket_name, object_name):
|
||||
return self.__get_object(bucket_name, object_name)
|
||||
|
||||
@retry(DataLoadingFailure)
|
||||
def __get_object(self, bucket_name, object_name):
|
||||
try:
|
||||
return self.__adapter.get_object(bucket_name, object_name)
|
||||
except Exception as err:
|
||||
logging.error(err)
|
||||
raise DataLoadingFailure from err
|
||||
|
||||
def get_all_objects(self, bucket_name):
|
||||
return self.__adapter.get_all_objects(bucket_name)
|
||||
|
||||
def clear_bucket(self, bucket_name):
|
||||
return self.__adapter.clear_bucket(bucket_name)
|
||||
|
||||
def get_all_object_names(self, bucket_name, prefix=None):
|
||||
return self.__adapter.get_all_object_names(bucket_name, prefix=prefix)
|
||||
@ -1,26 +0,0 @@
|
||||
from pyinfra.exceptions import UnknownStorageBackend
|
||||
from pyinfra.storage.adapters.azure import AzureStorageAdapter
|
||||
from pyinfra.storage.adapters.s3 import S3StorageAdapter
|
||||
from pyinfra.storage.clients.azure import get_azure_client
|
||||
from pyinfra.storage.clients.s3 import get_s3_client
|
||||
from pyinfra.storage.storage import Storage
|
||||
|
||||
|
||||
def get_azure_storage(config=None):
|
||||
return Storage(AzureStorageAdapter(get_azure_client(config)))
|
||||
|
||||
|
||||
def get_s3_storage(config=None):
|
||||
return Storage(S3StorageAdapter(get_s3_client(config)))
|
||||
|
||||
|
||||
def get_storage(storage_backend):
|
||||
|
||||
if storage_backend == "s3":
|
||||
storage = get_s3_storage()
|
||||
elif storage_backend == "azure":
|
||||
storage = get_azure_storage()
|
||||
else:
|
||||
raise UnknownStorageBackend(f"Unknown storage backend '{storage_backend}'.")
|
||||
|
||||
return storage
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user