add monitoring with grafana and prometheus
This commit is contained in:
parent
2117e2a294
commit
597043bb38
@ -8,6 +8,7 @@ service:
|
||||
batch_size: $BATCH_SIZE|2 # Number of images in memory simultaneously
|
||||
verbose: $VERBOSE|True # Service prints document processing progress to stdout
|
||||
run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the model from
|
||||
monitoring_enabled: $MONITORING_ENABLED|True # if app is doing monitoring or not
|
||||
|
||||
|
||||
# These variables control filters that are applied to either images, image metadata or model predictions. The filter
|
||||
|
||||
@ -19,3 +19,5 @@ PDFNetPython3~=9.1.0
|
||||
Pillow~=8.3.2
|
||||
PyYAML~=5.4.1
|
||||
scikit_learn~=0.24.2
|
||||
prometheus-client==0.13.1
|
||||
prometheus_flask_exporter==0.19.0
|
||||
17
src/serve.py
17
src/serve.py
@ -1,5 +1,6 @@
|
||||
import logging
|
||||
import tempfile
|
||||
import tracemalloc
|
||||
|
||||
from flask import Flask, request, jsonify
|
||||
from waitress import serve
|
||||
@ -7,14 +8,21 @@ from waitress import serve
|
||||
from image_prediction.config import CONFIG
|
||||
from image_prediction.predictor import Predictor, extract_image_metadata_pairs, classify_images
|
||||
from image_prediction.response import build_response
|
||||
from prometheus_client import Gauge, Counter
|
||||
from prometheus_flask_exporter import PrometheusMetrics
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
predictor = Predictor()
|
||||
logging.info("Predictor ready.")
|
||||
tracemalloc.start()
|
||||
|
||||
app = Flask(__name__)
|
||||
metrics = PrometheusMetrics(app=app, path='/prometheus')
|
||||
|
||||
file_counter = Counter("image_prediction_file_counter", "count processed files")
|
||||
ram_metric = Gauge("image_prediction_memory_usage", "Memory usage in Mb")
|
||||
|
||||
@app.route("/ready", methods=["GET"])
|
||||
def ready():
|
||||
@ -29,7 +37,13 @@ def main():
|
||||
return resp
|
||||
|
||||
@app.route("/", methods=["POST"])
|
||||
@metrics.summary('image_prediction_request_time_seconds', 'Time spent processing request')
|
||||
def predict():
|
||||
def do_monitoring():
|
||||
file_counter.inc()
|
||||
_, peak = tracemalloc.get_traced_memory()
|
||||
ram_metric.set(peak / 10 ** 6)
|
||||
|
||||
pdf = request.data
|
||||
|
||||
logging.debug("Running predictor on document...")
|
||||
@ -38,6 +52,8 @@ def main():
|
||||
image_metadata_pairs = extract_image_metadata_pairs(tmp_file.name)
|
||||
try:
|
||||
predictions, metadata = classify_images(predictor, image_metadata_pairs)
|
||||
if CONFIG.service.monitoring_enabled:
|
||||
do_monitoring()
|
||||
except Exception as err:
|
||||
logging.warning("Analysis failed.")
|
||||
logging.exception(err)
|
||||
@ -59,6 +75,7 @@ def run_prediction_server(app, mode="development"):
|
||||
app.run(host=CONFIG.webserver.host, port=CONFIG.webserver.port, debug=True)
|
||||
elif mode == "production":
|
||||
serve(app, host=CONFIG.webserver.host, port=CONFIG.webserver.port)
|
||||
tracemalloc.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user