31 lines
922 B
Python

import json
from cv_analysis.server.pipeline import make_image_analysis_pipeline
from cv_analysis.table_inference import infer_lines
from cv_analysis.utils.annotate import annotate_pdf
pipe = make_image_analysis_pipeline(infer_lines)
def parse_args():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("pdf", type=str, help="Path to the PDF file")
parser.add_argument("vlp_output", type=str, help="Path to the VLP output JSON file")
parser.add_argument("--output", type=str, help="Path to the output PDF file", default="/tmp/output.pdf")
return parser.parse_args()
args = parse_args()
pdf_bytes = open(args.pdf, "rb").read()
vlp_output = json.load(open(args.vlp_output, "r"))
best_result = list(pipe(data={"pdf": pdf_bytes, "vlp_output": vlp_output}))
# print(best_result)
if __name__ == "__main__":
annotate_pdf(pdf_bytes, best_result, output_path=args.output)