fixed json format and refactored service functions

This commit is contained in:
Isaac Riley 2022-04-22 11:22:16 +02:00
parent 88bb8dbddf
commit 9327fb7231
4 changed files with 33 additions and 40 deletions

View File

@ -7,6 +7,7 @@ WORKDIR /app/service
COPY ./src ./src COPY ./src ./src
COPY cv_analysis ./cv_analysis COPY cv_analysis ./cv_analysis
COPY config.yaml ./config.yaml
RUN python3 -m pip install --upgrade pip RUN python3 -m pip install --upgrade pip
RUN python3 -m pip install -e . RUN python3 -m pip install -e .

View File

@ -23,5 +23,5 @@ deskew:
test_dummy: test_dummy test_dummy: test_dummy
visual_logging: visual_logging:
level: $LOGGING_LEVEL_ROOT|DEBUG level: $LOGGING_LEVEL_ROOT|INFO
output_folder: /tmp/debug/ output_folder: /tmp/debug/

View File

@ -49,7 +49,7 @@ def main(args):
elif operation == "layout-parsing": elif operation == "layout-parsing":
response = requests.post("http://127.0.0.1:5000/layout", data=open(args.pdf_path, "rb")) response = requests.post("http://127.0.0.1:5000/layout", data=open(args.pdf_path, "rb"))
else: else:
raise ValueError("{args.operation} is not a valid value.") raise ValueError(f"{args.operation} is not a valid value.")
response.raise_for_status() response.raise_for_status()
predictions = response.json() predictions = response.json()

View File

@ -8,11 +8,10 @@ from prometheus_flask_exporter import PrometheusMetrics
from waitress import serve from waitress import serve
from cv_analysis.utils import npconvert from cv_analysis.utils import npconvert
from cv_analysis.utils.preprocessing import preprocess_pdf_image # TODO from cv_analysis.table_parsing import parse_table
from cv_analysis.table_parsing import parse_table # , detect_tables_in_pdf from cv_analysis.redaction_detection import find_redactions
from cv_analysis.redaction_detection import find_redactions # , detect_redactions_in_pdf from cv_analysis.layout_parsing import parse_layout
from cv_analysis.layout_parsing import parse_layout # , detect_layout_in_pdf #TODO from cv_analysis.figure_detection import detect_figures
from cv_analysis.figure_detection import detect_figures # , detect_figures_in_pdf #TODO
from cv_analysis.utils.logging import logger from cv_analysis.utils.logging import logger
from cv_analysis.utils.preprocessing import open_pdf from cv_analysis.utils.preprocessing import open_pdf
from cv_analysis.config import CONFIG from cv_analysis.config import CONFIG
@ -44,7 +43,7 @@ def main():
@metrics.summary("tables_request_time_seconds", "Time spent processing tables request") @metrics.summary("tables_request_time_seconds", "Time spent processing tables request")
def get_tables(): def get_tables():
start_monitoring() start_monitoring()
tables = annotate("tables") tables = annotate(parse_table)
# page_counter.inc(npages) # page_counter.inc(npages)
return tables return tables
@ -52,7 +51,7 @@ def main():
@metrics.summary("redactions_request_time_seconds", "Time spent processing redaction request") @metrics.summary("redactions_request_time_seconds", "Time spent processing redaction request")
def get_redactions(): def get_redactions():
start_monitoring() start_monitoring()
redactions = annotate("redactions") redactions = annotate(find_redactions)
# page_counter.inc(npages) # page_counter.inc(npages)
return redactions return redactions
@ -60,7 +59,7 @@ def main():
@metrics.summary("figures_request_time_seconds", "Time spent processing figures request") @metrics.summary("figures_request_time_seconds", "Time spent processing figures request")
def get_figures(): def get_figures():
start_monitoring() start_monitoring()
figures = annotate("figures") figures = annotate(detect_figures)
# page_counter.inc(npages) # page_counter.inc(npages)
return figures return figures
@ -68,7 +67,7 @@ def main():
@metrics.summary("layout_request_time_seconds", "Time spent processing layout request") @metrics.summary("layout_request_time_seconds", "Time spent processing layout request")
def get_layout(): def get_layout():
start_monitoring() start_monitoring()
layout = annotate("layout") layout = annotate(parse_layout)
# page_counter.inc(npages) # page_counter.inc(npages)
return layout return layout
@ -77,7 +76,6 @@ def main():
response = "OK" response = "OK"
return jsonify(response) return jsonify(response)
# predictor = initialize_predictor()
logger.info("<3 Annotator ready.") logger.info("<3 Annotator ready.")
mode = CONFIG.webserver.mode mode = CONFIG.webserver.mode
@ -89,46 +87,40 @@ def main():
tracemalloc.stop() tracemalloc.stop()
def apply_annotation_function(annotation_function, page_list):
outdict = {}
for i, page in enumerate(page_list):
results = annotation_function(page)
if results:
outdict.update({i: results})
return outdict
def make_annotations(pdf, annotation_function):
def make_annotations(pdf, task): results = []
if task == "tables": for i, page in enumerate(pdf):
annotation = {"tables": apply_annotation_function(parse_table, pdf)} boxes = annotation_function(page)
elif task == "redactions": cells= []
annotation = {"redactions": apply_annotation_function(find_redactions, pdf)} if boxes:
elif task == "figures": cells = [{"x": x, "y": y, "width": w, "height": h} for x,y,w,h in boxes]
annotation = {"figures": apply_annotation_function(detect_figures, pdf)} results.append({
elif task == "layout": "page": i,
annotation = {"layout": apply_annotation_function(parse_layout, pdf)} "pageWidth": page.shape[1],
else: "pageHeight": page.shape[0],
raise ValueError( "cells": cells
f"'{task}' is not a valid operation keyword. Valid values include: \ })
\ntables\nredactions\nfigures\nlayout\n" logger.info(str(results))
) logger.info(type(results))
output_dict = {"pages": results}
return json.dumps(annotation, default=npconvert) return jsonify(json.dumps(output_dict, default=npconvert))
def get_size(data): def get_size(data):
return round(getsizeof(data) / 1000000, 2) return round(getsizeof(data) / 1000000, 2)
def annotate(task): def annotate(annotation_function):
def inner(): def inner():
data = request.data data = request.data
logger.info(f"Received data.") logger.info(f"Received data.")
logger.info(f"Processing data.") logger.info(f"Processing data.")
pdf, angles = open_pdf(data) pdf, angles = open_pdf(data)
# npages = len(pdf) annotations = make_annotations(pdf, annotation_function)
annotations = make_annotations(pdf, task) #if CONFIG.deskew.function != "identity":
return jsonify({"result": annotations, "deskew_angles": angles}) # annotations.update({"deskew_angles": angles})
return annotations
try: try:
return inner() return inner()