From 597043bb38f8a880255676ef40c6bbf1936aace5 Mon Sep 17 00:00:00 2001 From: cdietrich Date: Tue, 15 Mar 2022 13:14:48 +0100 Subject: [PATCH] add monitoring with grafana and prometheus --- config.yaml | 1 + requirements.txt | 2 ++ src/serve.py | 17 +++++++++++++++++ 3 files changed, 20 insertions(+) diff --git a/config.yaml b/config.yaml index c48b4d6..73a1f08 100644 --- a/config.yaml +++ b/config.yaml @@ -8,6 +8,7 @@ service: batch_size: $BATCH_SIZE|2 # Number of images in memory simultaneously verbose: $VERBOSE|True # Service prints document processing progress to stdout run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the model from + monitoring_enabled: $MONITORING_ENABLED|True # if app is doing monitoring or not # These variables control filters that are applied to either images, image metadata or model predictions. The filter diff --git a/requirements.txt b/requirements.txt index d20ead3..3646261 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,3 +19,5 @@ PDFNetPython3~=9.1.0 Pillow~=8.3.2 PyYAML~=5.4.1 scikit_learn~=0.24.2 +prometheus-client==0.13.1 +prometheus_flask_exporter==0.19.0 \ No newline at end of file diff --git a/src/serve.py b/src/serve.py index bc6bae2..94a41c6 100644 --- a/src/serve.py +++ b/src/serve.py @@ -1,5 +1,6 @@ import logging import tempfile +import tracemalloc from flask import Flask, request, jsonify from waitress import serve @@ -7,14 +8,21 @@ from waitress import serve from image_prediction.config import CONFIG from image_prediction.predictor import Predictor, extract_image_metadata_pairs, classify_images from image_prediction.response import build_response +from prometheus_client import Gauge, Counter +from prometheus_flask_exporter import PrometheusMetrics def main(): predictor = Predictor() logging.info("Predictor ready.") + tracemalloc.start() app = Flask(__name__) + metrics = PrometheusMetrics(app=app, path='/prometheus') + + file_counter = Counter("image_prediction_file_counter", "count processed files") + ram_metric = Gauge("image_prediction_memory_usage", "Memory usage in Mb") @app.route("/ready", methods=["GET"]) def ready(): @@ -29,7 +37,13 @@ def main(): return resp @app.route("/", methods=["POST"]) + @metrics.summary('image_prediction_request_time_seconds', 'Time spent processing request') def predict(): + def do_monitoring(): + file_counter.inc() + _, peak = tracemalloc.get_traced_memory() + ram_metric.set(peak / 10 ** 6) + pdf = request.data logging.debug("Running predictor on document...") @@ -38,6 +52,8 @@ def main(): image_metadata_pairs = extract_image_metadata_pairs(tmp_file.name) try: predictions, metadata = classify_images(predictor, image_metadata_pairs) + if CONFIG.service.monitoring_enabled: + do_monitoring() except Exception as err: logging.warning("Analysis failed.") logging.exception(err) @@ -59,6 +75,7 @@ def run_prediction_server(app, mode="development"): app.run(host=CONFIG.webserver.host, port=CONFIG.webserver.port, debug=True) elif mode == "production": serve(app, host=CONFIG.webserver.host, port=CONFIG.webserver.port) + tracemalloc.stop() if __name__ == "__main__":