import json import tracemalloc from sys import getsizeof import logging from flask import Flask, request, jsonify from prometheus_client import Counter, Gauge from prometheus_flask_exporter import PrometheusMetrics from waitress import serve from cv_analysis.utils import npconvert from cv_analysis.table_parsing import parse_table from cv_analysis.redaction_detection import find_redactions from cv_analysis.layout_parsing import parse_layout from cv_analysis.figure_detection import detect_figures from cv_analysis.utils.logging import logger from cv_analysis.utils.preprocessing import open_pdf from cv_analysis.config import CONFIG def suppress_user_warnings(): import warnings warnings.filterwarnings("ignore") def main(): file_counter = Counter("cv_analysis_file_counter", "count processed files") # page_counter = Counter("cv_analysis_page_counter", "count pages from processed files") ram_metric = Gauge("cv_analysis_memory_usage", "Memory usage in Mb") def start_monitoring(): file_counter.inc() _, peak = tracemalloc.get_traced_memory() ram_metric.set(peak / 10**6) logger.info(make_art()) tracemalloc.start() app = Flask(__name__) metrics = PrometheusMetrics(app=app, path="/prometheus") @app.route("/tables", methods=["POST"]) @metrics.summary("tables_request_time_seconds", "Time spent processing tables request") def get_tables(): start_monitoring() tables = annotate(parse_table) # page_counter.inc(npages) return tables @app.route("/redactions", methods=["POST"]) @metrics.summary("redactions_request_time_seconds", "Time spent processing redaction request") def get_redactions(): start_monitoring() redactions = annotate(find_redactions) # page_counter.inc(npages) return redactions @app.route("/figures", methods=["POST"]) @metrics.summary("figures_request_time_seconds", "Time spent processing figures request") def get_figures(): start_monitoring() figures = annotate(detect_figures) # page_counter.inc(npages) return figures @app.route("/layout", methods=["POST"]) @metrics.summary("layout_request_time_seconds", "Time spent processing layout request") def get_layout(): start_monitoring() layout = annotate(parse_layout) # page_counter.inc(npages) return layout @app.route("/status", methods=["GET"]) def status(): response = "OK" return jsonify(response) logger.info("<3 Annotator ready.") mode = CONFIG.webserver.mode if mode == "development": app.run(host=CONFIG.webserver.host, port=CONFIG.webserver.port, debug=True) elif mode == "production": serve(app, host=CONFIG.webserver.host, port=CONFIG.webserver.port) logging.info("Production.") tracemalloc.stop() def make_annotations(pdf, annotation_function): results = [] for i, page in enumerate(pdf): boxes = annotation_function(page) cells= [] if boxes: cells = [{"x": x, "y": y, "width": w, "height": h} for x,y,w,h in boxes] results.append({ "page": i, "pageWidth": page.shape[1], "pageHeight": page.shape[0], "cells": cells }) logger.info(str(results)) logger.info(type(results)) output_dict = {"pages": results} return jsonify(json.dumps(output_dict, default=npconvert)) def get_size(data): return round(getsizeof(data) / 1000000, 2) def annotate(annotation_function): def inner(): data = request.data logger.info(f"Received data.") logger.info(f"Processing data.") pdf, angles = open_pdf(data) annotations = make_annotations(pdf, annotation_function) #if CONFIG.deskew.function != "identity": # annotations.update({"deskew_angles": angles}) return annotations try: return inner() except Exception as err: logger.warning("Analysis failed") logger.exception(err) resp = jsonify("Analysis failed") resp.status_code = 500 return resp def make_art(): art = r""" __ __ | \ | \ _______ __ __ ______ _______ ______ | $$ __ __ _______ \$$ _______ / \| \ / \ ______ | \ | \ | \ | $$| \ | \ / \| \ / \ | $$$$$$$ \$$\ / $$| \ \$$$$$$\| $$$$$$$\ \$$$$$$\| $$| $$ | $$| $$$$$$$| $$| $$$$$$$ | $$ \$$\ $$ \$$$$$$/ $$| $$ | $$ / $$| $$| $$ | $$ \$$ \ | $$ \$$ \ | $$_____ \$$ $$ | $$$$$$$| $$ | $$| $$$$$$$| $$| $$__/ $$ _\$$$$$$\| $$ _\$$$$$$\ \$$ \ \$$$ \$$ $$| $$ | $$ \$$ $$| $$ \$$ $$| $$| $$| $$ \$$$$$$$ \$ \$$$$$$$ \$$ \$$ \$$$$$$$ \$$ _\$$$$$$$ \$$$$$$$ \$$ \$$$$$$$ | \__| $$ \$$ $$ \$$$$$$ """ return art if __name__ == "__main__": main()