fixed tests, passed (still need to extend tests)

This commit is contained in:
Isaac Riley 2022-04-27 10:52:35 +02:00
parent 41e5f55ea7
commit 81fe5139c2
3 changed files with 59 additions and 410 deletions

View File

@ -174,7 +174,7 @@ def parse_table(image: np.array, show=False):
stats = np.vstack(list(filter(is_large_enough, stats))) stats = np.vstack(list(filter(is_large_enough, stats)))
rects = stats[:, :-1][2:] rects = stats[:, :-1][2:]
return list(rects) return list(map(list, rects))
def annotate_tables_in_pdf(pdf_path, page_index=0, deskew=False, show=False): def annotate_tables_in_pdf(pdf_path, page_index=0, deskew=False, show=False):

View File

@ -1,406 +1,62 @@
{ {
"0": [ "0": [
[ [211, 447, 367, 47],
211, [581, 447, 417, 47],
415, [1001, 447, 406, 47],
367, [211, 497, 367, 47],
29 [580, 497, 418, 47],
], [1001, 497, 406, 47],
[ [211, 547, 367, 47],
581, [580, 547, 418, 47],
415, [1001, 547, 406, 47],
417, [211, 597, 367, 47],
29 [581, 597, 417, 47],
], [1001, 597, 406, 48],
[ [212, 647, 366, 48],
1001, [581, 647, 417, 48],
415, [1001, 647, 406, 48],
406, [581, 697, 417, 47],
29 [1001, 697, 407, 48],
], [212, 698, 366, 47],
[ [211, 747, 367, 48],
211, [581, 747, 417, 48],
447, [1001, 748, 407, 47],
367, [211, 798, 367, 47],
47 [581, 798, 417, 47],
], [1001, 798, 407, 47],
[ [212, 848, 366, 47],
581, [581, 848, 417, 47],
447, [1001, 848, 407, 48],
417, [212, 898, 366, 48],
47 [581, 898, 417, 48],
], [1001, 898, 407, 48],
[ [462, 1195, 368, 48],
1001, [833, 1195, 404, 48],
447, [462, 1245, 368, 48],
406, [833, 1245, 404, 47],
47 [462, 1296, 368, 47],
], [833, 1296, 404, 47],
[ [462, 1346, 368, 47],
211, [833, 1346, 404, 47],
497, [462, 1396, 368, 47],
367, [834, 1396, 403, 47],
47 [462, 1446, 368, 48],
], [833, 1446, 404, 48],
[ [462, 1496, 368, 48],
580, [833, 1496, 404, 48],
497, [462, 1547, 368, 47],
418, [834, 1547, 403, 47],
47 [462, 1597, 368, 48],
], [834, 1597, 403, 47],
[ [462, 1647, 368, 48],
1001, [833, 1647, 404, 48],
497, [462, 1698, 368, 47],
406, [833, 1698, 404, 47],
47 [462, 1748, 368, 47],
], [834, 1748, 403, 47],
[ [462, 1798, 368, 47],
211, [834, 1798, 403, 47],
547, [462, 1848, 368, 48],
367, [834, 1848, 403, 48]
47
],
[
580,
547,
418,
47
],
[
1001,
547,
406,
47
],
[
211,
597,
367,
47
],
[
581,
597,
417,
47
],
[
1001,
597,
406,
48
],
[
212,
647,
366,
48
],
[
581,
647,
417,
48
],
[
1001,
647,
406,
48
],
[
581,
697,
417,
47
],
[
1001,
697,
407,
48
],
[
212,
698,
366,
47
],
[
211,
747,
367,
48
],
[
581,
747,
417,
48
],
[
1001,
748,
407,
47
],
[
211,
798,
367,
47
],
[
581,
798,
417,
47
],
[
1001,
798,
407,
47
],
[
212,
848,
366,
47
],
[
581,
848,
417,
47
],
[
1001,
848,
407,
48
],
[
212,
898,
366,
48
],
[
581,
898,
417,
48
],
[
1001,
898,
407,
48
],
[
212,
949,
366,
33
],
[
581,
949,
827,
33
],
[
462,
1163,
368,
29
],
[
833,
1163,
404,
29
],
[
462,
1195,
368,
48
],
[
833,
1195,
404,
48
],
[
462,
1245,
368,
48
],
[
833,
1245,
404,
47
],
[
462,
1296,
368,
47
],
[
833,
1296,
404,
47
],
[
462,
1346,
368,
47
],
[
833,
1346,
404,
47
],
[
462,
1396,
368,
47
],
[
834,
1396,
403,
47
],
[
462,
1446,
368,
48
],
[
833,
1446,
404,
48
],
[
462,
1496,
368,
48
],
[
833,
1496,
404,
48
],
[
462,
1547,
368,
47
],
[
834,
1547,
403,
47
],
[
462,
1597,
368,
48
],
[
834,
1597,
403,
47
],
[
462,
1647,
368,
48
],
[
833,
1647,
404,
48
],
[
462,
1698,
368,
47
],
[
833,
1698,
404,
47
],
[
462,
1748,
368,
47
],
[
834,
1748,
403,
47
],
[
462,
1798,
368,
47
],
[
834,
1798,
403,
47
],
[
462,
1848,
368,
48
],
[
834,
1848,
403,
48
],
[
462,
1899,
369,
33
],
[
832,
1899,
405,
33
]
] ]
} }

View File

@ -25,7 +25,6 @@ def suppress_user_warnings():
def main(): def main():
file_counter = Counter("cv_analysis_file_counter", "count processed files") file_counter = Counter("cv_analysis_file_counter", "count processed files")
# page_counter = Counter("cv_analysis_page_counter", "count pages from processed files")
ram_metric = Gauge("cv_analysis_memory_usage", "Memory usage in Mb") ram_metric = Gauge("cv_analysis_memory_usage", "Memory usage in Mb")
def start_monitoring(): def start_monitoring():
@ -44,7 +43,6 @@ def main():
def get_tables(): def get_tables():
start_monitoring() start_monitoring()
tables = annotate(parse_table) tables = annotate(parse_table)
# page_counter.inc(npages)
return tables return tables
@app.route("/redactions", methods=["POST"]) @app.route("/redactions", methods=["POST"])
@ -52,7 +50,6 @@ def main():
def get_redactions(): def get_redactions():
start_monitoring() start_monitoring()
redactions = annotate(find_redactions) redactions = annotate(find_redactions)
# page_counter.inc(npages)
return redactions return redactions
@app.route("/figures", methods=["POST"]) @app.route("/figures", methods=["POST"])
@ -60,7 +57,6 @@ def main():
def get_figures(): def get_figures():
start_monitoring() start_monitoring()
figures = annotate(detect_figures) figures = annotate(detect_figures)
# page_counter.inc(npages)
return figures return figures
@app.route("/layout", methods=["POST"]) @app.route("/layout", methods=["POST"])
@ -68,7 +64,6 @@ def main():
def get_layout(): def get_layout():
start_monitoring() start_monitoring()
layout = annotate(parse_layout) layout = annotate(parse_layout)
# page_counter.inc(npages)
return layout return layout
@app.route("/status", methods=["GET"]) @app.route("/status", methods=["GET"])
@ -93,8 +88,6 @@ def make_annotations(pdf, annotation_function):
boxes = annotation_function(page) boxes = annotation_function(page)
cells = [{"x": x, "y": y, "width": w, "height": h} for x, y, w, h in boxes] cells = [{"x": x, "y": y, "width": w, "height": h} for x, y, w, h in boxes]
results.append({"page": i, "pageWidth": page.shape[1], "pageHeight": page.shape[0], "cells": cells}) results.append({"page": i, "pageWidth": page.shape[1], "pageHeight": page.shape[0], "cells": cells})
logger.info(str(results))
logger.info(type(results))
output_dict = {"pages": results} output_dict = {"pages": results}
return jsonify(json.dumps(output_dict, default=npconvert)) return jsonify(json.dumps(output_dict, default=npconvert))