From 9327fb7231a7097b2e57d2cefd82fe4c8b1ffeeb Mon Sep 17 00:00:00 2001 From: Isaac Riley Date: Fri, 22 Apr 2022 11:22:16 +0200 Subject: [PATCH] fixed json format and refactored service functions --- Dockerfile | 1 + config.yaml | 2 +- scripts/client_mock.py | 2 +- src/run_service.py | 68 +++++++++++++++++++----------------------- 4 files changed, 33 insertions(+), 40 deletions(-) diff --git a/Dockerfile b/Dockerfile index 054c5d9..19f3b04 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,7 @@ WORKDIR /app/service COPY ./src ./src COPY cv_analysis ./cv_analysis +COPY config.yaml ./config.yaml RUN python3 -m pip install --upgrade pip RUN python3 -m pip install -e . diff --git a/config.yaml b/config.yaml index fc6bb42..42bd2e7 100644 --- a/config.yaml +++ b/config.yaml @@ -23,5 +23,5 @@ deskew: test_dummy: test_dummy visual_logging: - level: $LOGGING_LEVEL_ROOT|DEBUG + level: $LOGGING_LEVEL_ROOT|INFO output_folder: /tmp/debug/ \ No newline at end of file diff --git a/scripts/client_mock.py b/scripts/client_mock.py index ffdd0ab..96ab9b4 100644 --- a/scripts/client_mock.py +++ b/scripts/client_mock.py @@ -49,7 +49,7 @@ def main(args): elif operation == "layout-parsing": response = requests.post("http://127.0.0.1:5000/layout", data=open(args.pdf_path, "rb")) else: - raise ValueError("{args.operation} is not a valid value.") + raise ValueError(f"{args.operation} is not a valid value.") response.raise_for_status() predictions = response.json() diff --git a/src/run_service.py b/src/run_service.py index 269c2f4..304e8d8 100644 --- a/src/run_service.py +++ b/src/run_service.py @@ -8,11 +8,10 @@ from prometheus_flask_exporter import PrometheusMetrics from waitress import serve from cv_analysis.utils import npconvert -from cv_analysis.utils.preprocessing import preprocess_pdf_image # TODO -from cv_analysis.table_parsing import parse_table # , detect_tables_in_pdf -from cv_analysis.redaction_detection import find_redactions # , detect_redactions_in_pdf -from cv_analysis.layout_parsing import parse_layout # , detect_layout_in_pdf #TODO -from cv_analysis.figure_detection import detect_figures # , detect_figures_in_pdf #TODO +from cv_analysis.table_parsing import parse_table +from cv_analysis.redaction_detection import find_redactions +from cv_analysis.layout_parsing import parse_layout +from cv_analysis.figure_detection import detect_figures from cv_analysis.utils.logging import logger from cv_analysis.utils.preprocessing import open_pdf from cv_analysis.config import CONFIG @@ -44,7 +43,7 @@ def main(): @metrics.summary("tables_request_time_seconds", "Time spent processing tables request") def get_tables(): start_monitoring() - tables = annotate("tables") + tables = annotate(parse_table) # page_counter.inc(npages) return tables @@ -52,7 +51,7 @@ def main(): @metrics.summary("redactions_request_time_seconds", "Time spent processing redaction request") def get_redactions(): start_monitoring() - redactions = annotate("redactions") + redactions = annotate(find_redactions) # page_counter.inc(npages) return redactions @@ -60,7 +59,7 @@ def main(): @metrics.summary("figures_request_time_seconds", "Time spent processing figures request") def get_figures(): start_monitoring() - figures = annotate("figures") + figures = annotate(detect_figures) # page_counter.inc(npages) return figures @@ -68,7 +67,7 @@ def main(): @metrics.summary("layout_request_time_seconds", "Time spent processing layout request") def get_layout(): start_monitoring() - layout = annotate("layout") + layout = annotate(parse_layout) # page_counter.inc(npages) return layout @@ -77,7 +76,6 @@ def main(): response = "OK" return jsonify(response) - # predictor = initialize_predictor() logger.info("<3 Annotator ready.") mode = CONFIG.webserver.mode @@ -89,46 +87,40 @@ def main(): tracemalloc.stop() -def apply_annotation_function(annotation_function, page_list): - outdict = {} - for i, page in enumerate(page_list): - results = annotation_function(page) - if results: - outdict.update({i: results}) - return outdict - -def make_annotations(pdf, task): - if task == "tables": - annotation = {"tables": apply_annotation_function(parse_table, pdf)} - elif task == "redactions": - annotation = {"redactions": apply_annotation_function(find_redactions, pdf)} - elif task == "figures": - annotation = {"figures": apply_annotation_function(detect_figures, pdf)} - elif task == "layout": - annotation = {"layout": apply_annotation_function(parse_layout, pdf)} - else: - raise ValueError( - f"'{task}' is not a valid operation keyword. Valid values include: \ - \ntables\nredactions\nfigures\nlayout\n" - ) - - return json.dumps(annotation, default=npconvert) +def make_annotations(pdf, annotation_function): + results = [] + for i, page in enumerate(pdf): + boxes = annotation_function(page) + cells= [] + if boxes: + cells = [{"x": x, "y": y, "width": w, "height": h} for x,y,w,h in boxes] + results.append({ + "page": i, + "pageWidth": page.shape[1], + "pageHeight": page.shape[0], + "cells": cells + }) + logger.info(str(results)) + logger.info(type(results)) + output_dict = {"pages": results} + return jsonify(json.dumps(output_dict, default=npconvert)) def get_size(data): return round(getsizeof(data) / 1000000, 2) -def annotate(task): +def annotate(annotation_function): def inner(): data = request.data logger.info(f"Received data.") logger.info(f"Processing data.") pdf, angles = open_pdf(data) - # npages = len(pdf) - annotations = make_annotations(pdf, task) - return jsonify({"result": annotations, "deskew_angles": angles}) + annotations = make_annotations(pdf, annotation_function) + #if CONFIG.deskew.function != "identity": + # annotations.update({"deskew_angles": angles}) + return annotations try: return inner()