156 lines
5.3 KiB
Python
156 lines
5.3 KiB
Python
import json
|
|
import tracemalloc
|
|
from sys import getsizeof
|
|
import logging
|
|
from flask import Flask, request, jsonify
|
|
from prometheus_client import Counter, Gauge
|
|
from prometheus_flask_exporter import PrometheusMetrics
|
|
from waitress import serve
|
|
|
|
from cv_analysis.utils import npconvert
|
|
from cv_analysis.table_parsing import parse_table
|
|
from cv_analysis.redaction_detection import find_redactions
|
|
from cv_analysis.layout_parsing import parse_layout
|
|
from cv_analysis.figure_detection import detect_figures
|
|
from cv_analysis.utils.logging import logger
|
|
from cv_analysis.utils.preprocessing import open_pdf
|
|
from cv_analysis.config import CONFIG
|
|
|
|
|
|
def suppress_user_warnings():
|
|
import warnings
|
|
|
|
warnings.filterwarnings("ignore")
|
|
|
|
|
|
def main():
|
|
file_counter = Counter("cv_analysis_file_counter", "count processed files")
|
|
# page_counter = Counter("cv_analysis_page_counter", "count pages from processed files")
|
|
ram_metric = Gauge("cv_analysis_memory_usage", "Memory usage in Mb")
|
|
|
|
def start_monitoring():
|
|
file_counter.inc()
|
|
_, peak = tracemalloc.get_traced_memory()
|
|
ram_metric.set(peak / 10**6)
|
|
|
|
logger.info(make_art())
|
|
tracemalloc.start()
|
|
|
|
app = Flask(__name__)
|
|
metrics = PrometheusMetrics(app=app, path="/prometheus")
|
|
|
|
@app.route("/tables", methods=["POST"])
|
|
@metrics.summary("tables_request_time_seconds", "Time spent processing tables request")
|
|
def get_tables():
|
|
start_monitoring()
|
|
tables = annotate(parse_table)
|
|
# page_counter.inc(npages)
|
|
return tables
|
|
|
|
@app.route("/redactions", methods=["POST"])
|
|
@metrics.summary("redactions_request_time_seconds", "Time spent processing redaction request")
|
|
def get_redactions():
|
|
start_monitoring()
|
|
redactions = annotate(find_redactions)
|
|
# page_counter.inc(npages)
|
|
return redactions
|
|
|
|
@app.route("/figures", methods=["POST"])
|
|
@metrics.summary("figures_request_time_seconds", "Time spent processing figures request")
|
|
def get_figures():
|
|
start_monitoring()
|
|
figures = annotate(detect_figures)
|
|
# page_counter.inc(npages)
|
|
return figures
|
|
|
|
@app.route("/layout", methods=["POST"])
|
|
@metrics.summary("layout_request_time_seconds", "Time spent processing layout request")
|
|
def get_layout():
|
|
start_monitoring()
|
|
layout = annotate(parse_layout)
|
|
# page_counter.inc(npages)
|
|
return layout
|
|
|
|
@app.route("/status", methods=["GET"])
|
|
def status():
|
|
response = "OK"
|
|
return jsonify(response)
|
|
|
|
logger.info("<3 Annotator ready.")
|
|
|
|
mode = CONFIG.webserver.mode
|
|
if mode == "development":
|
|
app.run(host=CONFIG.webserver.host, port=CONFIG.webserver.port, debug=True)
|
|
elif mode == "production":
|
|
serve(app, host=CONFIG.webserver.host, port=CONFIG.webserver.port)
|
|
logging.info("Production.")
|
|
tracemalloc.stop()
|
|
|
|
|
|
|
|
def make_annotations(pdf, annotation_function):
|
|
results = []
|
|
for i, page in enumerate(pdf):
|
|
boxes = annotation_function(page)
|
|
cells= []
|
|
if boxes:
|
|
cells = [{"x": x, "y": y, "width": w, "height": h} for x,y,w,h in boxes]
|
|
results.append({
|
|
"page": i,
|
|
"pageWidth": page.shape[1],
|
|
"pageHeight": page.shape[0],
|
|
"cells": cells
|
|
})
|
|
logger.info(str(results))
|
|
logger.info(type(results))
|
|
output_dict = {"pages": results}
|
|
return jsonify(json.dumps(output_dict, default=npconvert))
|
|
|
|
|
|
def get_size(data):
|
|
return round(getsizeof(data) / 1000000, 2)
|
|
|
|
|
|
def annotate(annotation_function):
|
|
def inner():
|
|
data = request.data
|
|
logger.info(f"Received data.")
|
|
logger.info(f"Processing data.")
|
|
pdf, angles = open_pdf(data)
|
|
annotations = make_annotations(pdf, annotation_function)
|
|
#if CONFIG.deskew.function != "identity":
|
|
# annotations.update({"deskew_angles": angles})
|
|
return annotations
|
|
|
|
try:
|
|
return inner()
|
|
except Exception as err:
|
|
logger.warning("Analysis failed")
|
|
logger.exception(err)
|
|
resp = jsonify("Analysis failed")
|
|
resp.status_code = 500
|
|
return resp
|
|
|
|
|
|
def make_art():
|
|
art = r"""
|
|
__ __
|
|
| \ | \
|
|
_______ __ __ ______ _______ ______ | $$ __ __ _______ \$$ _______
|
|
/ \| \ / \ ______ | \ | \ | \ | $$| \ | \ / \| \ / \
|
|
| $$$$$$$ \$$\ / $$| \ \$$$$$$\| $$$$$$$\ \$$$$$$\| $$| $$ | $$| $$$$$$$| $$| $$$$$$$
|
|
| $$ \$$\ $$ \$$$$$$/ $$| $$ | $$ / $$| $$| $$ | $$ \$$ \ | $$ \$$ \
|
|
| $$_____ \$$ $$ | $$$$$$$| $$ | $$| $$$$$$$| $$| $$__/ $$ _\$$$$$$\| $$ _\$$$$$$\
|
|
\$$ \ \$$$ \$$ $$| $$ | $$ \$$ $$| $$ \$$ $$| $$| $$| $$
|
|
\$$$$$$$ \$ \$$$$$$$ \$$ \$$ \$$$$$$$ \$$ _\$$$$$$$ \$$$$$$$ \$$ \$$$$$$$
|
|
| \__| $$
|
|
\$$ $$
|
|
\$$$$$$
|
|
|
|
"""
|
|
return art
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|