97 lines
3.0 KiB
Python
97 lines
3.0 KiB
Python
import argparse
|
|
import time
|
|
from functools import partial
|
|
from pathlib import Path
|
|
|
|
import fitz
|
|
import numpy as np
|
|
from funcy import lmap
|
|
from matplotlib import pyplot as plt
|
|
|
|
from cv_analysis.server.pipeline import get_analysis_fn, make_analysis_pipeline
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("pdf_folder", help="Path to folder with PDFs to evaluate")
|
|
parser.add_argument("output_folder", help="Path to folder where the Runtime plot should be stored")
|
|
parser.add_argument("n_runs", help="Number of runs per test")
|
|
return parser.parse_args()
|
|
|
|
|
|
def measure(fn, n_runs):
|
|
def run(*args, **kwargs):
|
|
def _run():
|
|
start = time.time()
|
|
results = list(fn(*args, **kwargs)) # Evaluate generators
|
|
end = time.time()
|
|
return end - start
|
|
|
|
runtimes = [_run() for _ in range(n_runs)]
|
|
return np.mean(runtimes), np.std(runtimes)
|
|
|
|
return run
|
|
|
|
|
|
def run_tests(pdf, test_cases, n_runs):
|
|
def measure_analysis_pipe(test_case):
|
|
timed_analysis_pipe = measure(make_analysis_pipeline(get_analysis_fn(test_case)), n_runs)
|
|
return timed_analysis_pipe(pdf)
|
|
|
|
return lmap(measure_analysis_pipe, test_cases)
|
|
|
|
|
|
def to_ms_per_page(runtime, page_count):
|
|
ms_per_page = runtime / page_count * 1000
|
|
return round(ms_per_page, 0)
|
|
|
|
|
|
def measure_pdf(pdf_path, n_runs):
|
|
with open(pdf_path, "rb") as f:
|
|
pdf = f.read()
|
|
page_count = fitz.open(stream=pdf).page_count
|
|
format_fn = partial(to_ms_per_page, page_count=page_count)
|
|
|
|
means, std = zip(*run_tests(pdf, test_cases, n_runs=n_runs))
|
|
means, std = lmap(format_fn, means), lmap(format_fn, std)
|
|
return means, std
|
|
|
|
|
|
def plot_results_and_save(results, labels, n_runs, test_pdf_paths):
|
|
fig, ax = plt.subplots()
|
|
width = 0.2
|
|
x_labels = np.arange(len(labels))
|
|
plt.xticks(ticks=x_labels, labels=labels, rotation=90)
|
|
plt.grid(linestyle="dotted")
|
|
|
|
for idx, (result, test_pdf_path) in enumerate(zip(results, test_pdf_paths)):
|
|
x = x_labels + idx * width
|
|
means, std = result
|
|
bars = ax.bar(x, means, width, yerr=std, label=f"{test_pdf_path.stem}")
|
|
ax.bar_label(bars)
|
|
ax.set_ylabel("ms/page")
|
|
ax.set_xlabel("Cv-analysis operation")
|
|
ax.set_title(f"Cv-analysis runtime estimation {n_runs=}")
|
|
ax.legend(loc=0)
|
|
|
|
Path(args.output_folder).mkdir(parents=True, exist_ok=True)
|
|
output_path = f"{args.output_folder}/cv_analysis_runtime_{n_runs=}.png"
|
|
plt.savefig(output_path, dpi=200, bbox_inches="tight", pad_inches=0.5)
|
|
plt.close()
|
|
|
|
|
|
def measure_and_save_plot(args, test_cases):
|
|
n_runs = int(args.n_runs)
|
|
measure_pdf_fn = partial(measure_pdf, n_runs=n_runs)
|
|
test_pdf_paths = list(Path(args.pdf_folder).glob("*.pdf"))
|
|
results = lmap(measure_pdf_fn, test_pdf_paths)
|
|
plot_results_and_save(results, test_cases, n_runs, test_pdf_paths)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
test_cases = ["table", "layout", "figure"]
|
|
|
|
args = parse_args()
|
|
measure_and_save_plot(args, test_cases)
|