Merge in RR/cv-analysis from refactor-evaluate to master
Squashed commit of the following:
commit cde03a492452610322f8b7d3eb804a51afb76d81
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Fri Jul 22 12:37:36 2022 +0200
add optional show analysis metadata dict
commit fb8bb9e2afa7767f2560f865516295be65f97f20
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Fri Jul 22 12:13:18 2022 +0200
add script to evaluate runtime per page for all cv-analysis operations for multiple PDFs
commit 721e823e2ec38aae3fea51d01e2135fc8f228d94
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Fri Jul 22 10:30:31 2022 +0200
refactor
commit a453753cfa477e162e5902ce191ded61cb678337
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Fri Jul 22 10:19:24 2022 +0200
add logic to transform result coordinates accordingly to page rotation, update annotation script to use this logic
commit 71c09758d0fb763a2c38c6871e1d9bf51f2e7c41
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Thu Jul 21 15:57:49 2022 +0200
introduce pipeline for image conversion, analysis and result formatting
commit aef252a41b9658dd0c4f55aa2d9f84de933586e0
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Thu Jul 21 15:57:38 2022 +0200
introduce pipeline for image conversion, analysis and result formatting
48 lines
1.5 KiB
Python
48 lines
1.5 KiB
Python
import argparse
|
|
import json
|
|
from operator import itemgetter
|
|
from pathlib import Path
|
|
|
|
import fitz
|
|
|
|
from cv_analysis.server.pipeline import get_analysis_fn, make_analysis_pipeline
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("pdf_path")
|
|
parser.add_argument("output_folder")
|
|
parser.add_argument("--type", "-t", choices=["table", "layout", "figure"], required=True)
|
|
parser.add_argument("--verbose", action="store_true")
|
|
parser.add_argument("--silent", dest="verbose", action="store_false")
|
|
parser.set_defaults(verbose=False)
|
|
return parser.parse_args()
|
|
|
|
|
|
def analyse_annotate_save(pdf, analysis_type, output_path, verbose):
|
|
pipe = make_analysis_pipeline(get_analysis_fn(analysis_type))
|
|
results = list(pipe(pdf))
|
|
|
|
if verbose:
|
|
print(json.dumps(results, indent=2))
|
|
|
|
with fitz.open(stream=pdf) as pdf_handle:
|
|
for result in results:
|
|
page = pdf_handle[result["index"]]
|
|
for rect in result["bboxes"]:
|
|
x1, y1, x2, y2 = itemgetter("x1", "y1", "x2", "y2")(rect)
|
|
page.draw_rect((x1, y1, x2, y2), color=(0.5, 0.7, 0.2), width=2)
|
|
pdf_handle.save(output_path)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args = parse_args()
|
|
|
|
with open(args.pdf_path, "rb") as f:
|
|
pdf_bytes = f.read()
|
|
|
|
Path(args.output_folder).mkdir(parents=True, exist_ok=True)
|
|
output_path = f"{args.output_folder}/{Path(args.pdf_path).stem}_annotated_{args.type}.pdf"
|
|
|
|
analyse_annotate_save(pdf_bytes, args.type, output_path, args.verbose)
|