import json from cv_analysis.server.pipeline import make_image_analysis_pipeline from cv_analysis.table_inference import infer_lines from cv_analysis.utils.annotate import annotate_pdf pipe = make_image_analysis_pipeline(infer_lines) def parse_args(): import argparse parser = argparse.ArgumentParser() parser.add_argument("pdf", type=str, help="Path to the PDF file") parser.add_argument("vlp_output", type=str, help="Path to the VLP output JSON file") parser.add_argument("--output", type=str, help="Path to the output PDF file", default="/tmp/output.pdf") return parser.parse_args() args = parse_args() pdf_bytes = open(args.pdf, "rb").read() vlp_output = json.load(open(args.vlp_output, "r")) best_result = list(pipe(data={"pdf": pdf_bytes, "vlp_output": vlp_output})) # print(best_result) if __name__ == "__main__": annotate_pdf(pdf_bytes, best_result, output_path=args.output)