rancher ready

This commit is contained in:
Julius Unverfehrt 2022-02-15 09:38:16 +01:00
parent 2a76639049
commit 93d899c83c
5 changed files with 204 additions and 72 deletions

View File

@ -1,4 +1,4 @@
FROM python:3.8 as builder
FROM python:3.8
# Use a virtual environment.
RUN python -m venv /app/venv
@ -11,6 +11,7 @@ RUN python -m pip install --upgrade pip
WORKDIR /app/service
COPY . ./
# Install module & dependencies
RUN python3 -m pip install -e .
RUN python3 -m pip install -r requirements.txt

View File

@ -7,51 +7,12 @@ rabbitmq:
heartbeat: $RABBITMQ_HEARTBEAT|7200 # Controls AMQP heartbeat timeout in seconds
queues: # Names of queues for...
input: image_request_queue # requests to service
output: image_response_queue # responses by service
dead_letter: image_dead_letter_queue # messages that failed to process
prefetch_count: 1
retry: # Controls retry behaviour for messages the processing of which failed
enabled: $RETRY|True # Toggles retry behaviour
max_attempts: $MAX_ATTEMPTS|3 # Number of times a message may fail before being published to dead letter queue
# TODO: implement
max_interval: $MAX_INTERVAL|15000 # Increase timeout for a message every time it fails to a maximum of this value
minio:
host: $STORAGE_ENDPOINT|localhost # MinIO host address
port: $STORAGE_PORT|9000 # MinIO host port
user: $STORAGE_KEY|root # MinIO user name
password: $STORAGE_SECRET|password # MinIO user password
bucket: $STORAGE_BUCKET_NAME|redaction # MinIO bucket
azure_blob_storage:
connection_string: $STORAGE_AZURECONNECTIONSTRING|"DefaultEndpointsProtocol=https;AccountName=iqserdevelopment;AccountKey=4imAbV9PYXaztSOMpIyAClg88bAZCXuXMGJG0GA1eIBpdh2PlnFGoRBnKqLy2YZUSTmZ3wJfC7tzfHtuC6FEhQ==;EndpointSuffix=core.windows.net"
container: $STORAGE_AZURECONTAINERNAME|"image-service-v2-test-data"
sanic:
host: $SANIC_HOST|"0.0.0.0" # Sanic webserver host address
process_host: $SANIC_PROCESS_HOST|"127.0.0.1" # Sanic webserver host address for individual service processes
port: $SANIC_PORT|8080 # Sanic webserver host port
check_quantifier: $CHECK_QUANTIFIER|any # Whether all or any service instance needs to pass all checks for a passed master check
cache: false # Whether to cache readiness and health check results
logging_level_sanic: $LOGGING_LEVEL_SANIC|WARNING
input: mini_request_queue # requests to service
output: mini_response_queue # responses by service
dead_letter: mini_letter_queue # messages that failed to process
service:
logging_level: $LOGGING_LEVEL_ROOT|DEBUG # Logging level for log file messages
logfile_path: $LOGFILE_PATH|null # Overwrites the default path for the service logfile (image_service/log.log)
verbose: $VERBOSE|True # Service workers print document processing progress to stdout
assert_gpu: $ASSERT_GPU|False # Whether to make a working GPU a mandatory readiness condition
run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the model from
n_instances: $CONCURRENCY|1 # Number of service top loops that run in parallel (processes, not threads!)
name: $SERVICE_NAME|image-service-v2 # Name of the service in the kubernetes cluster
storage_backend: $STORAGE_BACKEND|s3 # The storage to pull files to be processed from
model_cache_file: "/root/.keras/models/efficientnetb0_notop.h5" # Workaround to intercept auto-download if model is not cached
batch_size: $BATCH_SIZE|32 # Number of images in memory simultaneously per service instance
minimum_free_memory_percentage: $MINIMUM_FREE_MEMORY_PERCENTAGE|.3 # Minimum allowed percentage of free memory
available_memory: $AVAILABLE_MEMORY|6000 # Available memory in MB
monitor_memory_usage: $MONITOR_MEMORY_USAGE|True # Whether to monitor the memory usage and kill the process when memory is insufficient
pdftron_license_key: "Knecon AG(en.knecon.swiss):OEM:DDA-R::WL+:AMS(20211029):BECC974307DAB4F34B513BC9B2531B24496F6FCB83CD8AC574358A959730B622FABEF5C7"
name: $SERVICE_NAME|mini-queue-service-v1 # Name of the service in the kubernetes cluster

View File

@ -0,0 +1,162 @@
apiVersion: apps/v1
kind: Deployment
metadata:
annotations:
meta.helm.sh/release-name: red-research
meta.helm.sh/release-namespace: red-research
labels:
apiVersion: v2
app: image-service
app.kubernetes.io/instance: red-research
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: redaction
helm.sh/chart: redaction
io.cattle.field/appId: red-research
type: service
name: mini-queue
namespace: red-research
spec:
selector:
matchLabels:
apiVersion: v2
app: image-service
io.cattle.field/appId: red-research
template:
metadata:
annotations:
prometheus.io/path: /prometheus
prometheus.io/port: "8080"
prometheus.io/scrape: "true"
labels:
apiVersion: v2
app: image-service
io.cattle.field/appId: red-research
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- image-service
topologyKey: kubernetes.io/hostname
weight: 100
automountServiceAccountToken: false
containers:
- env:
- name: BATCH_SIZE
value: "32"
- name: CONCURRENCY
value: "1"
- name: LOGGING_LEVEL_ROOT
value: DEBUG
- name: MAX_IMAGE_FORMAT
value: "10"
- name: MAX_REL_IMAGE_SIZE
value: "0.75"
- name: MINIMUM_FREE_MEMORY_PERCENTAGE
value: "0.3"
- name: MIN_IMAGE_FORMAT
value: "0.1"
- name: MIN_REL_IMAGE_SIZE
value: "0.05"
- name: MONITORING_ENABLED
value: "true"
- name: MONITOR_MEMORY_USAGE
value: "true"
- name: RABBITMQ_HEARTBEAT
value: "7200"
- name: RABBITMQ_HOST
value: red-research-rabbitmq
- name: RABBITMQ_USERNAME
value: user
- name: RUN_ID
value: fabfb1f192c745369b88cab34471aba7
- name: STORAGE_BUCKET_NAME
value: redaction
- name: STORAGE_ENDPOINT
value: red-research-minio-headless
- name: VERBOSE
value: "true"
- name: RABBITMQ_PASSWORD
valueFrom:
secretKeyRef:
key: rabbitmq-password
name: red-research-rabbitmq
optional: false
- name: STORAGE_KEY
valueFrom:
secretKeyRef:
key: root-user
name: red-research-minio
optional: false
- name: STORAGE_SECRET
valueFrom:
secretKeyRef:
key: root-password
name: red-research-minio
optional: false
envFrom:
- configMapRef:
name: storage-backend
optional: false
image: nexus.iqser.com:5001/red/mini-queue-service-v1:latest
imagePullPolicy: Always
name: mini-queue
ports:
- containerPort: 8080
name: http
protocol: TCP
resources:
limits:
cpu: "2"
memory: 4000Mi
requests:
cpu: "1"
memory: 2000Mi
securityContext:
allowPrivilegeEscalation: false
capabilities: {}
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1001
volumeMounts:
- mountPath: /tmp
name: tmp
- mountPath: /app/service/incl/redai_image/data/tmp
name: data-tmp
- mountPath: /app/service/incl/image_service/data/mlruns/.trash
name: trash
imagePullSecrets:
- name: nexus
initContainers:
- command:
- sh
- -c
- until nc -z -w 10 red-research-rabbitmq 5672; do echo waiting for rabbitmq;
done; echo rabbitmq found
image: nexus.iqser.com:5001/infra/busybox:1.33.1
imagePullPolicy: Always
name: init-rabbitmq
resources:
limits:
cpu: 100m
memory: 128Mi
requests:
cpu: 50m
memory: 64Mi
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1001
volumes:
- emptyDir: {}
name: tmp
- emptyDir: {}
name: data-tmp
- emptyDir: {}
name: trash

View File

@ -1,16 +0,0 @@
import pika
import sys
from retry import retry
from mini_queue.utils.config import CONFIG
def init_params():
credentials = pika.PlainCredentials(CONFIG.rabbitmq.user, CONFIG.rabbitmq.password)
parameters = pika.ConnectionParameters(
host=CONFIG.rabbitmq.host,
port=CONFIG.rabbitmq.port,
heartbeat=CONFIG.rabbitmq.heartbeat,
credentials=credentials,
)
return parameters

View File

@ -1,5 +1,4 @@
from mini_queue.consumer import init_params
from mini_queue.utils.config import CONFIG
import logging
import sys
import pika
from retry import retry
@ -8,28 +7,53 @@ from mini_queue.utils.config import CONFIG
def callback(channel, method, properties, body):
print("Received %r" % body)
logging.info(" [R] Received %r" % body)
sys.sleep(1)
response = body
channel.basic_publish(exchange="", routing_key=CONFIG.rabbitmq.queues.output, body=response)
channel.basic_ack(delivery_tag=method.delivery_tag)
if __name__ == "__main__":
def init_params():
credentials = pika.PlainCredentials(CONFIG.rabbitmq.user, CONFIG.rabbitmq.password)
parameters = pika.ConnectionParameters(
host=CONFIG.rabbitmq.host,
port=CONFIG.rabbitmq.port,
heartbeat=CONFIG.rabbitmq.heartbeat,
credentials=credentials,
)
return parameters
print("startet happy pikachu!")
def main():
logging.info(" [S] Startet happy pikachu!")
parameters = init_params()
connection = pika.BlockingConnection(parameters)
channel = connection.channel()
channel.queue_declare(queue=CONFIG.rabbitmq.queues.output, durable=True)
#channel.queue_declare(queue=CONFIG.rabbitmq.queues.output, durable=True)
while True:
try:
channel.basic_consume(queue=CONFIG.rabbitmq.queues.input, auto_ack=False, on_message_callback=callback)
print(" [*] Waiting for messages. To exit press CTRL+C")
logging.info(" [*] Waiting for messages. To exit press CTRL+C")
channel.start_consuming()
except:
pass
except pika.exceptions.ConnectionClosedByBroker as err:
logging.info("Caught a channel error: {}, stopping...".format(err))
continue
except pika.exceptions.AMQPChannelError as err:
logging.warning("Caught a channel error: {}, stopping...".format(err))
break
except pika.exceptions.AMQPConnectionError:
logging.info("Connection was closed, retrying...")
continue
if __name__ == "__main__":
logging_level = CONFIG.service.logging_level
logging.basicConfig(level=logging_level)
main()