RES-731: add queues per tenant

This commit is contained in:
Jonathan Kössler 2024-08-19 15:12:03 +02:00
parent 443c2614f9
commit 278f54eaa7
9 changed files with 36238 additions and 1585 deletions

View File

@ -5,4 +5,4 @@
url = ssh://vector.iqser.com/research/image-prediction/
port = 22
['remote "azure_remote"']
url = azure://ic-sa-dvc/
url = azure://image-classification-dvc/

View File

@ -1,31 +1,53 @@
include:
- project: "Gitlab/gitlab"
ref: 0.3.0
file: "/ci-templates/research/dvc-versioning-build-release.gitlab-ci.yml"
ref: main
file: "/ci-templates/research/dvc.gitlab-ci.yml"
- project: "Gitlab/gitlab"
ref: main
file: "/ci-templates/research/versioning-build-test-release.gitlab-ci.yml"
variables:
NEXUS_PROJECT_DIR: red
IMAGENAME: "${CI_PROJECT_NAME}"
INTEGRATION_TEST_FILE: "${CI_PROJECT_ID}.pdf"
FF_USE_FASTZIP: "true" # enable fastzip - a faster zip implementation that also supports level configuration.
ARTIFACT_COMPRESSION_LEVEL: default # can also be set to fastest, fast, slow and slowest. If just enabling fastzip is not enough try setting this to fastest or fast.
CACHE_COMPRESSION_LEVEL: default # same as above, but for caches
# TRANSFER_METER_FREQUENCY: 5s # will display transfer progress every 5 seconds for artifacts and remote caches. For debugging purposes.
stages:
- data
- setup
- tests
- sonarqube
- versioning
- build
- integration-tests
- release
docker-build:
extends: .docker-build
script:
- !reference [.docker-kaniko-build-config, script]
- !reference [.docker-kaniko-build-script, script]
needs:
- job: dvc-pull
artifacts: true
#################################
# temp. disable integration tests, b/c they don't cover the CV analysis case yet
trigger integration tests:
###################
# INTEGRATION TESTS
trigger-integration-tests:
extends: .integration-tests
# ADD THE MODEL BUILD WHICH SHOULD TRIGGER THE INTEGRATION TESTS
# needs:
# - job: docker-build::model_name
# artifacts: true
rules:
- when: never
release build:
stage: release
#########
# RELEASE
release:
extends: .release
needs:
- job: set custom version
artifacts: true
optional: true
- job: calculate patch version
artifacts: true
optional: true
- job: calculate minor version
artifacts: true
optional: true
- job: build docker nexus
artifacts: true
#################################
- !reference [.needs-versioning, needs] # leave this line as is

View File

@ -1,11 +1,17 @@
FROM python:3.10
FROM python:3.10-slim AS builder
ARG GITLAB_USER
ARG GITLAB_ACCESS_TOKEN
ARG USERNAME
ARG TOKEN
ARG PYPI_REGISTRY_RESEARCH=https://gitlab.knecon.com/api/v4/groups/19/-/packages/pypi
ARG POETRY_SOURCE_REF_RESEARCH=gitlab-research
ARG PYPI_REGISTRY_RED=https://gitlab.knecon.com/api/v4/groups/12/-/packages/pypi
ARG POETRY_SOURCE_REF_RED=gitlab-red
ARG PYPI_REGISTRY_FFORESIGHT=https://gitlab.knecon.com/api/v4/groups/269/-/packages/pypi
ARG POETRY_SOURCE_REF_FFORESIGHT=gitlab-fforesight
ARG VERSION=dev
LABEL maintainer="Research <research@knecon.com>"
@ -13,27 +19,55 @@ LABEL version="${VERSION}"
WORKDIR /app
###########
# ENV SETUP
ENV PYTHONDONTWRITEBYTECODE=true
ENV PYTHONUNBUFFERED=true
ENV POETRY_HOME=/opt/poetry
ENV PATH="$POETRY_HOME/bin:$PATH"
RUN apt-get update && \
apt-get install -y curl git bash build-essential libffi-dev libssl-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN curl -sSL https://install.python-poetry.org | python3 -
RUN poetry --version
COPY ./data ./data
COPY ./config ./config
COPY ./src ./src
COPY pyproject.toml poetry.lock banner.txt ./
COPY pyproject.toml poetry.lock ./
RUN poetry config virtualenvs.create false && \
RUN poetry config virtualenvs.create true && \
poetry config virtualenvs.in-project true && \
poetry config installer.max-workers 10 && \
poetry config repositories.${POETRY_SOURCE_REF_RESEARCH} ${PYPI_REGISTRY_RESEARCH} && \
poetry config http-basic.${POETRY_SOURCE_REF_RESEARCH} ${USERNAME} ${TOKEN} && \
poetry config http-basic.${POETRY_SOURCE_REF_RESEARCH} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
poetry config repositories.${POETRY_SOURCE_REF_RED} ${PYPI_REGISTRY_RED} && \
poetry config http-basic.${POETRY_SOURCE_REF_RED} ${USERNAME} ${TOKEN} && \
poetry install --without=dev -vv --no-interaction
poetry config http-basic.${POETRY_SOURCE_REF_RED} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
poetry config repositories.${POETRY_SOURCE_REF_FFORESIGHT} ${PYPI_REGISTRY_FFORESIGHT} && \
poetry config http-basic.${POETRY_SOURCE_REF_FFORESIGHT} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
poetry install --without=dev -vv --no-interaction --no-root
###############
# WORKING IMAGE
FROM python:3.10-slim
WORKDIR /app
# COPY SOURCE CODE FROM BUILDER IMAGE
COPY --from=builder /app /app
# COPY BILL OF MATERIALS (BOM)
COPY bom.json /bom.json
ENV PATH="/app/.venv/bin:$PATH"
###################
# COPY SOURCE CODE
COPY ./src ./src
COPY ./config ./config
COPY ./data ./data
COPY banner.txt ./
EXPOSE 5000
EXPOSE 8080
CMD [ "python", "src/serve.py"]

33697
bom.json Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,17 @@
[dynamic_tenant_queues]
enabled = true
[metrics.prometheus]
enabled = true
prefix = "redactmanager_image_service"
[tracing.opentelemetry]
[tracing]
enabled = true
# possible values "opentelemetry" | "azure_monitor" (Excpects APPLICATIONINSIGHTS_CONNECTION_STRING environment variable.)
type = "azure_monitor"
[tracing.opentelemetry]
endpoint = "http://otel-collector-opentelemetry-collector.otel-collector:4318/v1/traces"
service_name = "redactmanager_image_service"
exporter = "otlp"
@ -25,6 +33,16 @@ input_queue = "request_queue"
output_queue = "response_queue"
dead_letter_queue = "dead_letter_queue"
tenant_event_queue_suffix = "_tenant_event_queue"
tenant_event_dlq_suffix = "_tenant_events_dlq"
tenant_exchange_name = "tenants-exchange"
queue_expiration_time = 300000 # 5 minutes in milliseconds
service_request_queue_prefix = "image_request_queue"
service_request_exchange_name = "image_request_exchange"
service_response_exchange_name = "image_response_exchange"
service_dlq_name = "image_dlq"
[storage]
backend = "s3"
@ -41,4 +59,7 @@ connection_string = ""
[storage.tenant_server]
public_key = ""
endpoint = "http://tenant-user-management:8081/internal-api/tenants"
endpoint = "http://tenant-user-management:8081/internal-api/tenants"
[kubernetes]
pod_name = "test_pod"

3961
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "image-classification-service"
version = "2.4.0"
version = "2.4.1"
description = ""
authors = ["Team Research <research@knecon.com>"]
readme = "README.md"
@ -8,7 +8,7 @@ packages = [{ include = "image_prediction", from = "src" }]
[tool.poetry.dependencies]
python = ">=3.10,<3.11"
pyinfra = { version = "2.1.0", source = "gitlab-research" }
pyinfra = { version = "3.2.2", source = "gitlab-research" }
kn-utils = { version = "0.2.7", source = "gitlab-research" }
dvc = "^2.34.0"
dvc-ssh = "^2.20.0"
@ -32,11 +32,11 @@ coverage = "^6.3.2"
Pillow = "^9.1.0"
pdf2image = "^1.16.0"
frozendict = "^2.3.0"
protobuf = "^3.20.0"
fsspec = "^2022.11.0"
PyMonad = "^2.4.0"
pdfnetpython3 = "9.4.2"
loguru = "^0.7.0"
cyclonedx-bom = "^4.5.0"
[tool.poetry.group.dev.dependencies]
pytest = "^7.0.1"

View File

@ -1,6 +1,6 @@
docker build -t image-clsasification-service:$(poetry version -s)-dev \
docker build -t --platform linux/amd64 image-clsasification-service:$(poetry version -s)-dev \
-f Dockerfile \
--build-arg USERNAME=$GITLAB_USER \
--build-arg TOKEN=$GITLAB_ACCESS_TOKEN \
--build-arg GITLAB_USER=$GITLAB_USER \
--build-arg GITLAB_ACCESS_TOKEN=$GITLAB_ACCESS_TOKEN \
. && \
docker run -it --rm image-clsasification-service:$(poetry version -s)-dev

View File

@ -35,7 +35,7 @@ class ParsablePDFImageExtractor(ImageExtractor):
tolerance: The tolerance in pixels for the distance between images, beyond which they will not be stitched
together
"""
self.doc: fitz.fitz.Document = None
self.doc: fitz.Document = None
self.verbose = verbose
self.tolerance = tolerance
@ -48,7 +48,7 @@ class ParsablePDFImageExtractor(ImageExtractor):
yield from image_metadata_pairs
def __process_images_on_page(self, page: fitz.fitz.Page):
def __process_images_on_page(self, page: fitz.Page):
metadata = extract_valid_metadata(self.doc, page)
images = get_images_on_page(self.doc, metadata)
@ -130,7 +130,7 @@ def get_images_on_page(doc, metadata):
yield from images
def extract_valid_metadata(doc: fitz.fitz.Document, page: fitz.fitz.Page):
def extract_valid_metadata(doc: fitz.Document, page: fitz.Page):
metadata = get_metadata_for_images_on_page(page)
metadata = filter_valid_metadata(metadata)
metadata = add_alpha_channel_info(doc, metadata)
@ -191,7 +191,7 @@ def xref_to_image(doc, xref) -> Union[Image.Image, None]:
return
def convert_pixmap_to_array(pixmap: fitz.fitz.Pixmap):
def convert_pixmap_to_array(pixmap: fitz.Pixmap):
array = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(pixmap.h, pixmap.w, pixmap.n)
array = _normalize_channels(array)
return array