diff --git a/cv_analysis/table_parsing.py b/cv_analysis/table_parsing.py index 0a6ceed..852df2b 100644 --- a/cv_analysis/table_parsing.py +++ b/cv_analysis/table_parsing.py @@ -174,7 +174,7 @@ def parse_table(image: np.array, show=False): stats = np.vstack(list(filter(is_large_enough, stats))) rects = stats[:, :-1][2:] - return list(rects) + return list(map(list, rects)) def annotate_tables_in_pdf(pdf_path, page_index=0, deskew=False, show=False): diff --git a/cv_analysis/test/test_data/table.json b/cv_analysis/test/test_data/table.json index 009b24e..5e78d0e 100644 --- a/cv_analysis/test/test_data/table.json +++ b/cv_analysis/test/test_data/table.json @@ -1,406 +1,62 @@ { "0": [ - [ - 211, - 415, - 367, - 29 - ], - [ - 581, - 415, - 417, - 29 - ], - [ - 1001, - 415, - 406, - 29 - ], - [ - 211, - 447, - 367, - 47 - ], - [ - 581, - 447, - 417, - 47 - ], - [ - 1001, - 447, - 406, - 47 - ], - [ - 211, - 497, - 367, - 47 - ], - [ - 580, - 497, - 418, - 47 - ], - [ - 1001, - 497, - 406, - 47 - ], - [ - 211, - 547, - 367, - 47 - ], - [ - 580, - 547, - 418, - 47 - ], - [ - 1001, - 547, - 406, - 47 - ], - [ - 211, - 597, - 367, - 47 - ], - [ - 581, - 597, - 417, - 47 - ], - [ - 1001, - 597, - 406, - 48 - ], - [ - 212, - 647, - 366, - 48 - ], - [ - 581, - 647, - 417, - 48 - ], - [ - 1001, - 647, - 406, - 48 - ], - [ - 581, - 697, - 417, - 47 - ], - [ - 1001, - 697, - 407, - 48 - ], - [ - 212, - 698, - 366, - 47 - ], - [ - 211, - 747, - 367, - 48 - ], - [ - 581, - 747, - 417, - 48 - ], - [ - 1001, - 748, - 407, - 47 - ], - [ - 211, - 798, - 367, - 47 - ], - [ - 581, - 798, - 417, - 47 - ], - [ - 1001, - 798, - 407, - 47 - ], - [ - 212, - 848, - 366, - 47 - ], - [ - 581, - 848, - 417, - 47 - ], - [ - 1001, - 848, - 407, - 48 - ], - [ - 212, - 898, - 366, - 48 - ], - [ - 581, - 898, - 417, - 48 - ], - [ - 1001, - 898, - 407, - 48 - ], - [ - 212, - 949, - 366, - 33 - ], - [ - 581, - 949, - 827, - 33 - ], - [ - 462, - 1163, - 368, - 29 - ], - [ - 833, - 1163, - 404, - 29 - ], - [ - 462, - 1195, - 368, - 48 - ], - [ - 833, - 1195, - 404, - 48 - ], - [ - 462, - 1245, - 368, - 48 - ], - [ - 833, - 1245, - 404, - 47 - ], - [ - 462, - 1296, - 368, - 47 - ], - [ - 833, - 1296, - 404, - 47 - ], - [ - 462, - 1346, - 368, - 47 - ], - [ - 833, - 1346, - 404, - 47 - ], - [ - 462, - 1396, - 368, - 47 - ], - [ - 834, - 1396, - 403, - 47 - ], - [ - 462, - 1446, - 368, - 48 - ], - [ - 833, - 1446, - 404, - 48 - ], - [ - 462, - 1496, - 368, - 48 - ], - [ - 833, - 1496, - 404, - 48 - ], - [ - 462, - 1547, - 368, - 47 - ], - [ - 834, - 1547, - 403, - 47 - ], - [ - 462, - 1597, - 368, - 48 - ], - [ - 834, - 1597, - 403, - 47 - ], - [ - 462, - 1647, - 368, - 48 - ], - [ - 833, - 1647, - 404, - 48 - ], - [ - 462, - 1698, - 368, - 47 - ], - [ - 833, - 1698, - 404, - 47 - ], - [ - 462, - 1748, - 368, - 47 - ], - [ - 834, - 1748, - 403, - 47 - ], - [ - 462, - 1798, - 368, - 47 - ], - [ - 834, - 1798, - 403, - 47 - ], - [ - 462, - 1848, - 368, - 48 - ], - [ - 834, - 1848, - 403, - 48 - ], - [ - 462, - 1899, - 369, - 33 - ], - [ - 832, - 1899, - 405, - 33 - ] + [211, 447, 367, 47], + [581, 447, 417, 47], + [1001, 447, 406, 47], + [211, 497, 367, 47], + [580, 497, 418, 47], + [1001, 497, 406, 47], + [211, 547, 367, 47], + [580, 547, 418, 47], + [1001, 547, 406, 47], + [211, 597, 367, 47], + [581, 597, 417, 47], + [1001, 597, 406, 48], + [212, 647, 366, 48], + [581, 647, 417, 48], + [1001, 647, 406, 48], + [581, 697, 417, 47], + [1001, 697, 407, 48], + [212, 698, 366, 47], + [211, 747, 367, 48], + [581, 747, 417, 48], + [1001, 748, 407, 47], + [211, 798, 367, 47], + [581, 798, 417, 47], + [1001, 798, 407, 47], + [212, 848, 366, 47], + [581, 848, 417, 47], + [1001, 848, 407, 48], + [212, 898, 366, 48], + [581, 898, 417, 48], + [1001, 898, 407, 48], + [462, 1195, 368, 48], + [833, 1195, 404, 48], + [462, 1245, 368, 48], + [833, 1245, 404, 47], + [462, 1296, 368, 47], + [833, 1296, 404, 47], + [462, 1346, 368, 47], + [833, 1346, 404, 47], + [462, 1396, 368, 47], + [834, 1396, 403, 47], + [462, 1446, 368, 48], + [833, 1446, 404, 48], + [462, 1496, 368, 48], + [833, 1496, 404, 48], + [462, 1547, 368, 47], + [834, 1547, 403, 47], + [462, 1597, 368, 48], + [834, 1597, 403, 47], + [462, 1647, 368, 48], + [833, 1647, 404, 48], + [462, 1698, 368, 47], + [833, 1698, 404, 47], + [462, 1748, 368, 47], + [834, 1748, 403, 47], + [462, 1798, 368, 47], + [834, 1798, 403, 47], + [462, 1848, 368, 48], + [834, 1848, 403, 48] ] } \ No newline at end of file diff --git a/src/run_service.py b/src/run_service.py index 876b96e..86454eb 100644 --- a/src/run_service.py +++ b/src/run_service.py @@ -25,7 +25,6 @@ def suppress_user_warnings(): def main(): file_counter = Counter("cv_analysis_file_counter", "count processed files") - # page_counter = Counter("cv_analysis_page_counter", "count pages from processed files") ram_metric = Gauge("cv_analysis_memory_usage", "Memory usage in Mb") def start_monitoring(): @@ -44,7 +43,6 @@ def main(): def get_tables(): start_monitoring() tables = annotate(parse_table) - # page_counter.inc(npages) return tables @app.route("/redactions", methods=["POST"]) @@ -52,7 +50,6 @@ def main(): def get_redactions(): start_monitoring() redactions = annotate(find_redactions) - # page_counter.inc(npages) return redactions @app.route("/figures", methods=["POST"]) @@ -60,7 +57,6 @@ def main(): def get_figures(): start_monitoring() figures = annotate(detect_figures) - # page_counter.inc(npages) return figures @app.route("/layout", methods=["POST"]) @@ -68,7 +64,6 @@ def main(): def get_layout(): start_monitoring() layout = annotate(parse_layout) - # page_counter.inc(npages) return layout @app.route("/status", methods=["GET"]) @@ -93,8 +88,6 @@ def make_annotations(pdf, annotation_function): boxes = annotation_function(page) cells = [{"x": x, "y": y, "width": w, "height": h} for x, y, w, h in boxes] results.append({"page": i, "pageWidth": page.shape[1], "pageHeight": page.shape[0], "cells": cells}) - logger.info(str(results)) - logger.info(type(results)) output_dict = {"pages": results} return jsonify(json.dumps(output_dict, default=npconvert))