add monitoring with grafana and prometheus

This commit is contained in:
cdietrich 2022-03-15 13:14:48 +01:00
parent 2117e2a294
commit 597043bb38
3 changed files with 20 additions and 0 deletions

View File

@ -8,6 +8,7 @@ service:
batch_size: $BATCH_SIZE|2 # Number of images in memory simultaneously
verbose: $VERBOSE|True # Service prints document processing progress to stdout
run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the model from
monitoring_enabled: $MONITORING_ENABLED|True # if app is doing monitoring or not
# These variables control filters that are applied to either images, image metadata or model predictions. The filter

View File

@ -19,3 +19,5 @@ PDFNetPython3~=9.1.0
Pillow~=8.3.2
PyYAML~=5.4.1
scikit_learn~=0.24.2
prometheus-client==0.13.1
prometheus_flask_exporter==0.19.0

View File

@ -1,5 +1,6 @@
import logging
import tempfile
import tracemalloc
from flask import Flask, request, jsonify
from waitress import serve
@ -7,14 +8,21 @@ from waitress import serve
from image_prediction.config import CONFIG
from image_prediction.predictor import Predictor, extract_image_metadata_pairs, classify_images
from image_prediction.response import build_response
from prometheus_client import Gauge, Counter
from prometheus_flask_exporter import PrometheusMetrics
def main():
predictor = Predictor()
logging.info("Predictor ready.")
tracemalloc.start()
app = Flask(__name__)
metrics = PrometheusMetrics(app=app, path='/prometheus')
file_counter = Counter("image_prediction_file_counter", "count processed files")
ram_metric = Gauge("image_prediction_memory_usage", "Memory usage in Mb")
@app.route("/ready", methods=["GET"])
def ready():
@ -29,7 +37,13 @@ def main():
return resp
@app.route("/", methods=["POST"])
@metrics.summary('image_prediction_request_time_seconds', 'Time spent processing request')
def predict():
def do_monitoring():
file_counter.inc()
_, peak = tracemalloc.get_traced_memory()
ram_metric.set(peak / 10 ** 6)
pdf = request.data
logging.debug("Running predictor on document...")
@ -38,6 +52,8 @@ def main():
image_metadata_pairs = extract_image_metadata_pairs(tmp_file.name)
try:
predictions, metadata = classify_images(predictor, image_metadata_pairs)
if CONFIG.service.monitoring_enabled:
do_monitoring()
except Exception as err:
logging.warning("Analysis failed.")
logging.exception(err)
@ -59,6 +75,7 @@ def run_prediction_server(app, mode="development"):
app.run(host=CONFIG.webserver.host, port=CONFIG.webserver.port, debug=True)
elif mode == "production":
serve(app, host=CONFIG.webserver.host, port=CONFIG.webserver.port)
tracemalloc.stop()
if __name__ == "__main__":