import argparse import time from functools import partial from pathlib import Path import fitz import numpy as np from funcy import lmap from matplotlib import pyplot as plt from cv_analysis.server.pipeline import get_analysis_fn, make_analysis_pipeline def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("pdf_folder", help="Path to folder with PDFs to evaluate") parser.add_argument("output_folder", help="Path to folder where the Runtime plot should be stored") parser.add_argument("n_runs", help="Number of runs per test") return parser.parse_args() def measure(fn, n_runs): def run(*args, **kwargs): def _run(): start = time.time() results = list(fn(*args, **kwargs)) # Evaluate generators end = time.time() return end - start runtimes = [_run() for _ in range(n_runs)] return np.mean(runtimes), np.std(runtimes) return run def run_tests(pdf, test_cases, n_runs): def measure_analysis_pipe(test_case): timed_analysis_pipe = measure(make_analysis_pipeline(get_analysis_fn(test_case)), n_runs) return timed_analysis_pipe(pdf) return lmap(measure_analysis_pipe, test_cases) def to_ms_per_page(runtime, page_count): ms_per_page = runtime / page_count * 1000 return round(ms_per_page, 0) def measure_pdf(pdf_path, n_runs): with open(pdf_path, "rb") as f: pdf = f.read() page_count = fitz.open(stream=pdf).page_count format_fn = partial(to_ms_per_page, page_count=page_count) means, std = zip(*run_tests(pdf, test_cases, n_runs=n_runs)) means, std = lmap(format_fn, means), lmap(format_fn, std) return means, std def plot_results_and_save(results, labels, n_runs, test_pdf_paths): fig, ax = plt.subplots() width = 0.2 x_labels = np.arange(len(labels)) plt.xticks(ticks=x_labels, labels=labels, rotation=90) plt.grid(linestyle="dotted") for idx, (result, test_pdf_path) in enumerate(zip(results, test_pdf_paths)): x = x_labels + idx * width means, std = result bars = ax.bar(x, means, width, yerr=std, label=f"{test_pdf_path.stem}") ax.bar_label(bars) ax.set_ylabel("ms/page") ax.set_xlabel("Cv-analysis operation") ax.set_title(f"Cv-analysis runtime estimation {n_runs=}") ax.legend(loc=0) Path(args.output_folder).mkdir(parents=True, exist_ok=True) output_path = f"{args.output_folder}/cv_analysis_runtime_{n_runs=}.png" plt.savefig(output_path, dpi=200, bbox_inches="tight", pad_inches=0.5) plt.close() def measure_and_save_plot(args, test_cases): n_runs = int(args.n_runs) measure_pdf_fn = partial(measure_pdf, n_runs=n_runs) test_pdf_paths = list(Path(args.pdf_folder).glob("*.pdf")) results = lmap(measure_pdf_fn, test_pdf_paths) plot_results_and_save(results, test_cases, n_runs, test_pdf_paths) if __name__ == "__main__": test_cases = ["table", "layout", "figure"] args = parse_args() measure_and_save_plot(args, test_cases)