cv-analysis-service/src/run_service.py

156 lines
5.3 KiB
Python

import json
import tracemalloc
from sys import getsizeof
import logging
from flask import Flask, request, jsonify
from prometheus_client import Counter, Gauge
from prometheus_flask_exporter import PrometheusMetrics
from waitress import serve
from cv_analysis.utils import npconvert
from cv_analysis.table_parsing import parse_table
from cv_analysis.redaction_detection import find_redactions
from cv_analysis.layout_parsing import parse_layout
from cv_analysis.figure_detection import detect_figures
from cv_analysis.utils.logging import logger
from cv_analysis.utils.preprocessing import open_pdf
from cv_analysis.config import CONFIG
def suppress_user_warnings():
import warnings
warnings.filterwarnings("ignore")
def main():
file_counter = Counter("cv_analysis_file_counter", "count processed files")
# page_counter = Counter("cv_analysis_page_counter", "count pages from processed files")
ram_metric = Gauge("cv_analysis_memory_usage", "Memory usage in Mb")
def start_monitoring():
file_counter.inc()
_, peak = tracemalloc.get_traced_memory()
ram_metric.set(peak / 10**6)
logger.info(make_art())
tracemalloc.start()
app = Flask(__name__)
metrics = PrometheusMetrics(app=app, path="/prometheus")
@app.route("/tables", methods=["POST"])
@metrics.summary("tables_request_time_seconds", "Time spent processing tables request")
def get_tables():
start_monitoring()
tables = annotate(parse_table)
# page_counter.inc(npages)
return tables
@app.route("/redactions", methods=["POST"])
@metrics.summary("redactions_request_time_seconds", "Time spent processing redaction request")
def get_redactions():
start_monitoring()
redactions = annotate(find_redactions)
# page_counter.inc(npages)
return redactions
@app.route("/figures", methods=["POST"])
@metrics.summary("figures_request_time_seconds", "Time spent processing figures request")
def get_figures():
start_monitoring()
figures = annotate(detect_figures)
# page_counter.inc(npages)
return figures
@app.route("/layout", methods=["POST"])
@metrics.summary("layout_request_time_seconds", "Time spent processing layout request")
def get_layout():
start_monitoring()
layout = annotate(parse_layout)
# page_counter.inc(npages)
return layout
@app.route("/status", methods=["GET"])
def status():
response = "OK"
return jsonify(response)
logger.info("<3 Annotator ready.")
mode = CONFIG.webserver.mode
if mode == "development":
app.run(host=CONFIG.webserver.host, port=CONFIG.webserver.port, debug=True)
elif mode == "production":
serve(app, host=CONFIG.webserver.host, port=CONFIG.webserver.port)
logging.info("Production.")
tracemalloc.stop()
def make_annotations(pdf, annotation_function):
results = []
for i, page in enumerate(pdf):
boxes = annotation_function(page)
cells= []
if boxes:
cells = [{"x": x, "y": y, "width": w, "height": h} for x,y,w,h in boxes]
results.append({
"page": i,
"pageWidth": page.shape[1],
"pageHeight": page.shape[0],
"cells": cells
})
logger.info(str(results))
logger.info(type(results))
output_dict = {"pages": results}
return jsonify(json.dumps(output_dict, default=npconvert))
def get_size(data):
return round(getsizeof(data) / 1000000, 2)
def annotate(annotation_function):
def inner():
data = request.data
logger.info(f"Received data.")
logger.info(f"Processing data.")
pdf, angles = open_pdf(data)
annotations = make_annotations(pdf, annotation_function)
#if CONFIG.deskew.function != "identity":
# annotations.update({"deskew_angles": angles})
return annotations
try:
return inner()
except Exception as err:
logger.warning("Analysis failed")
logger.exception(err)
resp = jsonify("Analysis failed")
resp.status_code = 500
return resp
def make_art():
art = r"""
__ __
| \ | \
_______ __ __ ______ _______ ______ | $$ __ __ _______ \$$ _______
/ \| \ / \ ______ | \ | \ | \ | $$| \ | \ / \| \ / \
| $$$$$$$ \$$\ / $$| \ \$$$$$$\| $$$$$$$\ \$$$$$$\| $$| $$ | $$| $$$$$$$| $$| $$$$$$$
| $$ \$$\ $$ \$$$$$$/ $$| $$ | $$ / $$| $$| $$ | $$ \$$ \ | $$ \$$ \
| $$_____ \$$ $$ | $$$$$$$| $$ | $$| $$$$$$$| $$| $$__/ $$ _\$$$$$$\| $$ _\$$$$$$\
\$$ \ \$$$ \$$ $$| $$ | $$ \$$ $$| $$ \$$ $$| $$| $$| $$
\$$$$$$$ \$ \$$$$$$$ \$$ \$$ \$$$$$$$ \$$ _\$$$$$$$ \$$$$$$$ \$$ \$$$$$$$
| \__| $$
\$$ $$
\$$$$$$
"""
return art
if __name__ == "__main__":
main()