add monitoring with grafana and prometheus
This commit is contained in:
parent
2117e2a294
commit
597043bb38
@ -8,6 +8,7 @@ service:
|
|||||||
batch_size: $BATCH_SIZE|2 # Number of images in memory simultaneously
|
batch_size: $BATCH_SIZE|2 # Number of images in memory simultaneously
|
||||||
verbose: $VERBOSE|True # Service prints document processing progress to stdout
|
verbose: $VERBOSE|True # Service prints document processing progress to stdout
|
||||||
run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the model from
|
run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the model from
|
||||||
|
monitoring_enabled: $MONITORING_ENABLED|True # if app is doing monitoring or not
|
||||||
|
|
||||||
|
|
||||||
# These variables control filters that are applied to either images, image metadata or model predictions. The filter
|
# These variables control filters that are applied to either images, image metadata or model predictions. The filter
|
||||||
|
|||||||
@ -19,3 +19,5 @@ PDFNetPython3~=9.1.0
|
|||||||
Pillow~=8.3.2
|
Pillow~=8.3.2
|
||||||
PyYAML~=5.4.1
|
PyYAML~=5.4.1
|
||||||
scikit_learn~=0.24.2
|
scikit_learn~=0.24.2
|
||||||
|
prometheus-client==0.13.1
|
||||||
|
prometheus_flask_exporter==0.19.0
|
||||||
17
src/serve.py
17
src/serve.py
@ -1,5 +1,6 @@
|
|||||||
import logging
|
import logging
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import tracemalloc
|
||||||
|
|
||||||
from flask import Flask, request, jsonify
|
from flask import Flask, request, jsonify
|
||||||
from waitress import serve
|
from waitress import serve
|
||||||
@ -7,14 +8,21 @@ from waitress import serve
|
|||||||
from image_prediction.config import CONFIG
|
from image_prediction.config import CONFIG
|
||||||
from image_prediction.predictor import Predictor, extract_image_metadata_pairs, classify_images
|
from image_prediction.predictor import Predictor, extract_image_metadata_pairs, classify_images
|
||||||
from image_prediction.response import build_response
|
from image_prediction.response import build_response
|
||||||
|
from prometheus_client import Gauge, Counter
|
||||||
|
from prometheus_flask_exporter import PrometheusMetrics
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
||||||
predictor = Predictor()
|
predictor = Predictor()
|
||||||
logging.info("Predictor ready.")
|
logging.info("Predictor ready.")
|
||||||
|
tracemalloc.start()
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
metrics = PrometheusMetrics(app=app, path='/prometheus')
|
||||||
|
|
||||||
|
file_counter = Counter("image_prediction_file_counter", "count processed files")
|
||||||
|
ram_metric = Gauge("image_prediction_memory_usage", "Memory usage in Mb")
|
||||||
|
|
||||||
@app.route("/ready", methods=["GET"])
|
@app.route("/ready", methods=["GET"])
|
||||||
def ready():
|
def ready():
|
||||||
@ -29,7 +37,13 @@ def main():
|
|||||||
return resp
|
return resp
|
||||||
|
|
||||||
@app.route("/", methods=["POST"])
|
@app.route("/", methods=["POST"])
|
||||||
|
@metrics.summary('image_prediction_request_time_seconds', 'Time spent processing request')
|
||||||
def predict():
|
def predict():
|
||||||
|
def do_monitoring():
|
||||||
|
file_counter.inc()
|
||||||
|
_, peak = tracemalloc.get_traced_memory()
|
||||||
|
ram_metric.set(peak / 10 ** 6)
|
||||||
|
|
||||||
pdf = request.data
|
pdf = request.data
|
||||||
|
|
||||||
logging.debug("Running predictor on document...")
|
logging.debug("Running predictor on document...")
|
||||||
@ -38,6 +52,8 @@ def main():
|
|||||||
image_metadata_pairs = extract_image_metadata_pairs(tmp_file.name)
|
image_metadata_pairs = extract_image_metadata_pairs(tmp_file.name)
|
||||||
try:
|
try:
|
||||||
predictions, metadata = classify_images(predictor, image_metadata_pairs)
|
predictions, metadata = classify_images(predictor, image_metadata_pairs)
|
||||||
|
if CONFIG.service.monitoring_enabled:
|
||||||
|
do_monitoring()
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
logging.warning("Analysis failed.")
|
logging.warning("Analysis failed.")
|
||||||
logging.exception(err)
|
logging.exception(err)
|
||||||
@ -59,6 +75,7 @@ def run_prediction_server(app, mode="development"):
|
|||||||
app.run(host=CONFIG.webserver.host, port=CONFIG.webserver.port, debug=True)
|
app.run(host=CONFIG.webserver.host, port=CONFIG.webserver.port, debug=True)
|
||||||
elif mode == "production":
|
elif mode == "production":
|
||||||
serve(app, host=CONFIG.webserver.host, port=CONFIG.webserver.port)
|
serve(app, host=CONFIG.webserver.host, port=CONFIG.webserver.port)
|
||||||
|
tracemalloc.stop()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user