fixed tests, passed (still need to extend tests)

This commit is contained in:
Isaac Riley 2022-04-27 10:52:35 +02:00
parent 41e5f55ea7
commit 81fe5139c2
3 changed files with 59 additions and 410 deletions

View File

@ -174,7 +174,7 @@ def parse_table(image: np.array, show=False):
stats = np.vstack(list(filter(is_large_enough, stats)))
rects = stats[:, :-1][2:]
return list(rects)
return list(map(list, rects))
def annotate_tables_in_pdf(pdf_path, page_index=0, deskew=False, show=False):

View File

@ -1,406 +1,62 @@
{
"0": [
[
211,
415,
367,
29
],
[
581,
415,
417,
29
],
[
1001,
415,
406,
29
],
[
211,
447,
367,
47
],
[
581,
447,
417,
47
],
[
1001,
447,
406,
47
],
[
211,
497,
367,
47
],
[
580,
497,
418,
47
],
[
1001,
497,
406,
47
],
[
211,
547,
367,
47
],
[
580,
547,
418,
47
],
[
1001,
547,
406,
47
],
[
211,
597,
367,
47
],
[
581,
597,
417,
47
],
[
1001,
597,
406,
48
],
[
212,
647,
366,
48
],
[
581,
647,
417,
48
],
[
1001,
647,
406,
48
],
[
581,
697,
417,
47
],
[
1001,
697,
407,
48
],
[
212,
698,
366,
47
],
[
211,
747,
367,
48
],
[
581,
747,
417,
48
],
[
1001,
748,
407,
47
],
[
211,
798,
367,
47
],
[
581,
798,
417,
47
],
[
1001,
798,
407,
47
],
[
212,
848,
366,
47
],
[
581,
848,
417,
47
],
[
1001,
848,
407,
48
],
[
212,
898,
366,
48
],
[
581,
898,
417,
48
],
[
1001,
898,
407,
48
],
[
212,
949,
366,
33
],
[
581,
949,
827,
33
],
[
462,
1163,
368,
29
],
[
833,
1163,
404,
29
],
[
462,
1195,
368,
48
],
[
833,
1195,
404,
48
],
[
462,
1245,
368,
48
],
[
833,
1245,
404,
47
],
[
462,
1296,
368,
47
],
[
833,
1296,
404,
47
],
[
462,
1346,
368,
47
],
[
833,
1346,
404,
47
],
[
462,
1396,
368,
47
],
[
834,
1396,
403,
47
],
[
462,
1446,
368,
48
],
[
833,
1446,
404,
48
],
[
462,
1496,
368,
48
],
[
833,
1496,
404,
48
],
[
462,
1547,
368,
47
],
[
834,
1547,
403,
47
],
[
462,
1597,
368,
48
],
[
834,
1597,
403,
47
],
[
462,
1647,
368,
48
],
[
833,
1647,
404,
48
],
[
462,
1698,
368,
47
],
[
833,
1698,
404,
47
],
[
462,
1748,
368,
47
],
[
834,
1748,
403,
47
],
[
462,
1798,
368,
47
],
[
834,
1798,
403,
47
],
[
462,
1848,
368,
48
],
[
834,
1848,
403,
48
],
[
462,
1899,
369,
33
],
[
832,
1899,
405,
33
]
[211, 447, 367, 47],
[581, 447, 417, 47],
[1001, 447, 406, 47],
[211, 497, 367, 47],
[580, 497, 418, 47],
[1001, 497, 406, 47],
[211, 547, 367, 47],
[580, 547, 418, 47],
[1001, 547, 406, 47],
[211, 597, 367, 47],
[581, 597, 417, 47],
[1001, 597, 406, 48],
[212, 647, 366, 48],
[581, 647, 417, 48],
[1001, 647, 406, 48],
[581, 697, 417, 47],
[1001, 697, 407, 48],
[212, 698, 366, 47],
[211, 747, 367, 48],
[581, 747, 417, 48],
[1001, 748, 407, 47],
[211, 798, 367, 47],
[581, 798, 417, 47],
[1001, 798, 407, 47],
[212, 848, 366, 47],
[581, 848, 417, 47],
[1001, 848, 407, 48],
[212, 898, 366, 48],
[581, 898, 417, 48],
[1001, 898, 407, 48],
[462, 1195, 368, 48],
[833, 1195, 404, 48],
[462, 1245, 368, 48],
[833, 1245, 404, 47],
[462, 1296, 368, 47],
[833, 1296, 404, 47],
[462, 1346, 368, 47],
[833, 1346, 404, 47],
[462, 1396, 368, 47],
[834, 1396, 403, 47],
[462, 1446, 368, 48],
[833, 1446, 404, 48],
[462, 1496, 368, 48],
[833, 1496, 404, 48],
[462, 1547, 368, 47],
[834, 1547, 403, 47],
[462, 1597, 368, 48],
[834, 1597, 403, 47],
[462, 1647, 368, 48],
[833, 1647, 404, 48],
[462, 1698, 368, 47],
[833, 1698, 404, 47],
[462, 1748, 368, 47],
[834, 1748, 403, 47],
[462, 1798, 368, 47],
[834, 1798, 403, 47],
[462, 1848, 368, 48],
[834, 1848, 403, 48]
]
}

View File

@ -25,7 +25,6 @@ def suppress_user_warnings():
def main():
file_counter = Counter("cv_analysis_file_counter", "count processed files")
# page_counter = Counter("cv_analysis_page_counter", "count pages from processed files")
ram_metric = Gauge("cv_analysis_memory_usage", "Memory usage in Mb")
def start_monitoring():
@ -44,7 +43,6 @@ def main():
def get_tables():
start_monitoring()
tables = annotate(parse_table)
# page_counter.inc(npages)
return tables
@app.route("/redactions", methods=["POST"])
@ -52,7 +50,6 @@ def main():
def get_redactions():
start_monitoring()
redactions = annotate(find_redactions)
# page_counter.inc(npages)
return redactions
@app.route("/figures", methods=["POST"])
@ -60,7 +57,6 @@ def main():
def get_figures():
start_monitoring()
figures = annotate(detect_figures)
# page_counter.inc(npages)
return figures
@app.route("/layout", methods=["POST"])
@ -68,7 +64,6 @@ def main():
def get_layout():
start_monitoring()
layout = annotate(parse_layout)
# page_counter.inc(npages)
return layout
@app.route("/status", methods=["GET"])
@ -93,8 +88,6 @@ def make_annotations(pdf, annotation_function):
boxes = annotation_function(page)
cells = [{"x": x, "y": y, "width": w, "height": h} for x, y, w, h in boxes]
results.append({"page": i, "pageWidth": page.shape[1], "pageHeight": page.shape[0], "cells": cells})
logger.info(str(results))
logger.info(type(results))
output_dict = {"pages": results}
return jsonify(json.dumps(output_dict, default=npconvert))