cv-analysis-service/scripts/measure_runtimes.py
2024-04-29 12:09:44 +02:00

97 lines
3.0 KiB
Python

import argparse
import time
from functools import partial
from pathlib import Path
import fitz
import numpy as np
from funcy import lmap
from matplotlib import pyplot as plt
from cv_analysis.server.pipeline import get_analysis_fn, make_analysis_pipeline
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("pdf_folder", help="Path to folder with PDFs to evaluate")
parser.add_argument("output_folder", help="Path to folder where the Runtime plot should be stored")
parser.add_argument("n_runs", help="Number of runs per test")
return parser.parse_args()
def measure(fn, n_runs):
def run(*args, **kwargs):
def _run():
start = time.time()
results = list(fn(*args, **kwargs)) # Evaluate generators
end = time.time()
return end - start
runtimes = [_run() for _ in range(n_runs)]
return np.mean(runtimes), np.std(runtimes)
return run
def run_tests(pdf, test_cases, n_runs):
def measure_analysis_pipe(test_case):
timed_analysis_pipe = measure(make_analysis_pipeline(get_analysis_fn(test_case)), n_runs)
return timed_analysis_pipe(pdf)
return lmap(measure_analysis_pipe, test_cases)
def to_ms_per_page(runtime, page_count):
ms_per_page = runtime / page_count * 1000
return round(ms_per_page, 0)
def measure_pdf(pdf_path, n_runs):
with open(pdf_path, "rb") as f:
pdf = f.read()
page_count = fitz.open(stream=pdf).page_count
format_fn = partial(to_ms_per_page, page_count=page_count)
means, std = zip(*run_tests(pdf, test_cases, n_runs=n_runs))
means, std = lmap(format_fn, means), lmap(format_fn, std)
return means, std
def plot_results_and_save(results, labels, n_runs, test_pdf_paths):
fig, ax = plt.subplots()
width = 0.2
x_labels = np.arange(len(labels))
plt.xticks(ticks=x_labels, labels=labels, rotation=90)
plt.grid(linestyle="dotted")
for idx, (result, test_pdf_path) in enumerate(zip(results, test_pdf_paths)):
x = x_labels + idx * width
means, std = result
bars = ax.bar(x, means, width, yerr=std, label=f"{test_pdf_path.stem}")
ax.bar_label(bars)
ax.set_ylabel("ms/page")
ax.set_xlabel("Cv-analysis operation")
ax.set_title(f"Cv-analysis runtime estimation {n_runs=}")
ax.legend(loc=0)
Path(args.output_folder).mkdir(parents=True, exist_ok=True)
output_path = f"{args.output_folder}/cv_analysis_runtime_{n_runs=}.png"
plt.savefig(output_path, dpi=200, bbox_inches="tight", pad_inches=0.5)
plt.close()
def measure_and_save_plot(args, test_cases):
n_runs = int(args.n_runs)
measure_pdf_fn = partial(measure_pdf, n_runs=n_runs)
test_pdf_paths = list(Path(args.pdf_folder).glob("*.pdf"))
results = lmap(measure_pdf_fn, test_pdf_paths)
plot_results_and_save(results, test_cases, n_runs, test_pdf_paths)
if __name__ == "__main__":
test_cases = ["table", "layout", "figure"]
args = parse_args()
measure_and_save_plot(args, test_cases)