Merge in RR/cv-analysis from fix-response-coords to master
Squashed commit of the following:
commit 0c6178a564b48abc43f129f81d93091a277fc64a
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Thu Oct 6 14:53:02 2022 +0200
update tests
commit 46ad8737593df976555e4f60db8dc7947784d46d
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Thu Oct 6 14:40:25 2022 +0200
rename script
commit f541311d0aae22d5b76ba3c2580aada662812557
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Thu Oct 6 14:40:11 2022 +0200
response now returns natural page index, update pdf2image to correct response coordinates
30 lines
727 B
Python
30 lines
727 B
Python
import argparse
|
|
import json
|
|
from pathlib import Path
|
|
|
|
from cv_analysis.server.pipeline import get_analysis_pipeline
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("pdf")
|
|
parser.add_argument("--type", "-t", choices=["table", "layout", "figure"], required=True)
|
|
return parser.parse_args()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args = parse_args()
|
|
|
|
analysis_fn = get_analysis_pipeline(args.type)
|
|
|
|
with open(args.pdf, "rb") as f:
|
|
pdf_bytes = f.read()
|
|
|
|
results = list(analysis_fn(pdf_bytes))
|
|
|
|
folder = Path(args.pdf).parent
|
|
file_stem = Path(args.pdf).stem
|
|
|
|
with open(f"{folder}/{file_stem}_{args.type}.json", "w+") as f:
|
|
json.dump(results, f, indent=2)
|