cv-analysis-service/scripts/measure_runtimes.py
Julius Unverfehrt a871fa3bd3 Pull request #19: Refactor evaluate
Merge in RR/cv-analysis from refactor-evaluate to master

Squashed commit of the following:

commit cde03a492452610322f8b7d3eb804a51afb76d81
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Fri Jul 22 12:37:36 2022 +0200

    add optional show analysis metadata dict

commit fb8bb9e2afa7767f2560f865516295be65f97f20
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Fri Jul 22 12:13:18 2022 +0200

    add script to evaluate runtime per page for all cv-analysis operations for multiple PDFs

commit 721e823e2ec38aae3fea51d01e2135fc8f228d94
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Fri Jul 22 10:30:31 2022 +0200

    refactor

commit a453753cfa477e162e5902ce191ded61cb678337
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Fri Jul 22 10:19:24 2022 +0200

    add logic to transform result coordinates accordingly to page rotation, update annotation script to use this logic

commit 71c09758d0fb763a2c38c6871e1d9bf51f2e7c41
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Thu Jul 21 15:57:49 2022 +0200

    introduce pipeline for image conversion, analysis and result formatting

commit aef252a41b9658dd0c4f55aa2d9f84de933586e0
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Thu Jul 21 15:57:38 2022 +0200

    introduce pipeline for image conversion, analysis and result formatting
2022-07-22 15:11:40 +02:00

97 lines
3.0 KiB
Python

import argparse
import time
from functools import partial
from pathlib import Path
import fitz
import numpy as np
from funcy import lmap
from matplotlib import pyplot as plt
from cv_analysis.server.pipeline import make_analysis_pipeline, get_analysis_fn
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("pdf_folder", help="Path to folder with PDFs to evaluate")
parser.add_argument("output_folder", help="Path to folder where the Runtime plot should be stored")
parser.add_argument("n_runs", help="Number of runs per test")
return parser.parse_args()
def measure(fn, n_runs):
def run(*args, **kwargs):
def _run():
start = time.time()
results = list(fn(*args, **kwargs)) # Evaluate generators
end = time.time()
return end - start
runtimes = [_run() for _ in range(n_runs)]
return np.mean(runtimes), np.std(runtimes)
return run
def run_tests(pdf, test_cases, n_runs):
def measure_analysis_pipe(test_case):
timed_analysis_pipe = measure(make_analysis_pipeline(get_analysis_fn(test_case)), n_runs)
return timed_analysis_pipe(pdf)
return lmap(measure_analysis_pipe, test_cases)
def to_ms_per_page(runtime, page_count):
ms_per_page = runtime / page_count * 1000
return round(ms_per_page, 0)
def measure_pdf(pdf_path, n_runs):
with open(pdf_path, "rb") as f:
pdf = f.read()
page_count = fitz.open(stream=pdf).page_count
format_fn = partial(to_ms_per_page, page_count=page_count)
means, std = zip(*run_tests(pdf, test_cases, n_runs=n_runs))
means, std = lmap(format_fn, means), lmap(format_fn, std)
return means, std
def plot_results_and_save(results, labels, n_runs, test_pdf_paths):
fig, ax = plt.subplots()
width = 0.2
x_labels = np.arange(len(labels))
plt.xticks(ticks=x_labels, labels=labels, rotation=90)
plt.grid(linestyle="dotted")
for idx, (result, test_pdf_path) in enumerate(zip(results, test_pdf_paths)):
x = x_labels + idx * width
means, std = result
bars = ax.bar(x, means, width, yerr=std, label=f"{test_pdf_path.stem}")
ax.bar_label(bars)
ax.set_ylabel("ms/page")
ax.set_xlabel("Cv-analysis operation")
ax.set_title(f"Cv-analysis runtime estimation {n_runs=}")
ax.legend(loc=0)
Path(args.output_folder).mkdir(parents=True, exist_ok=True)
output_path = f"{args.output_folder}/cv_analysis_runtime_{n_runs=}.png"
plt.savefig(output_path, dpi=200, bbox_inches="tight", pad_inches=0.5)
plt.close()
def measure_and_save_plot(args, test_cases):
n_runs = int(args.n_runs)
measure_pdf_fn = partial(measure_pdf, n_runs=n_runs)
test_pdf_paths = list(Path(args.pdf_folder).glob("*.pdf"))
results = lmap(measure_pdf_fn, test_pdf_paths)
plot_results_and_save(results, test_cases, n_runs, test_pdf_paths)
if __name__ == "__main__":
test_cases = ["table", "layout", "figure"]
args = parse_args()
measure_and_save_plot(args, test_cases)