diff --git a/scripts/parse_pdf.py b/scripts/parse_pdf.py index d90a5f4..21f0467 100644 --- a/scripts/parse_pdf.py +++ b/scripts/parse_pdf.py @@ -6,13 +6,24 @@ from cv_analysis.utils.annotate import annotate_pdf pipe = make_image_analysis_pipeline(infer_lines) -# FIXME: Implement argparsing -pdf_bytes = open("test/test_data/article.pdf", "rb").read() -vlp_output = json.load(open("test/test_data/article.json", "r")) +def parse_args(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("pdf", type=str, help="Path to the PDF file") + parser.add_argument("vlp_output", type=str, help="Path to the VLP output JSON file") + parser.add_argument("--output", type=str, help="Path to the output PDF file", default="/tmp/output.pdf") + return parser.parse_args() + + +args = parse_args() + +pdf_bytes = open(args.pdf, "rb").read() +vlp_output = json.load(open(args.vlp_output, "r")) best_result = list(pipe(data={"pdf": pdf_bytes, "vlp_output": vlp_output})) # print(best_result) -annotate_pdf(pdf_bytes, best_result, output_path = "/tmp/deine-mutter.pdf") \ No newline at end of file +annotate_pdf(pdf_bytes, best_result, output_path=args.output) diff --git a/src/cv_analysis/utils/annotate.py b/src/cv_analysis/utils/annotate.py index f02f865..8fab792 100644 --- a/src/cv_analysis/utils/annotate.py +++ b/src/cv_analysis/utils/annotate.py @@ -32,7 +32,7 @@ def annotate_page(page: fitz.Page, prediction): for line in prediction["tableLines"]: start = itemgetter("x1", "y1")(line) end = itemgetter("x2", "y2")(line) - page.draw_line(start, end) + page.draw_line(start, end, color=(1, 0, 0.5), width=1) return page