Merge in RR/cv-analysis from image-service-compat to master
Squashed commit of the following:
commit 397d12a96a6b78de762f7b3a80a72427f5f51e97
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Aug 16 16:14:40 2022 +0200
update pdf2image, adjust response format for table-parsing & figure-detection
commit f2061bda8d25d64de974e97f36148dea29af50d9
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Mon Aug 15 08:56:39 2022 +0200
add script to save figure detection data that can be used for image-service pipeline script
28 lines
633 B
Python
28 lines
633 B
Python
import argparse
|
|
import json
|
|
from pathlib import Path
|
|
|
|
from cv_analysis.server.pipeline import get_analysis_pipeline
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("pdf")
|
|
return parser.parse_args()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args = parse_args()
|
|
|
|
detect_figures = get_analysis_pipeline("figure")
|
|
|
|
with open(args.pdf, "rb") as f:
|
|
pdf_bytes = f.read()
|
|
|
|
results = list(detect_figures(pdf_bytes))
|
|
|
|
folder = Path(args.pdf).parent
|
|
file_stem = Path(args.pdf).stem
|
|
|
|
with open(f"{folder}/{file_stem}_figures.json", "w+") as f:
|
|
json.dump(results, f, indent=2) |