Merge in RR/cv-analysis from pdf2image to master
Squashed commit of the following:
commit 1353f54d2dceb0a79b1f81bfa2c035f5a454275a
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Wed Aug 10 09:07:31 2022 +0200
add deRotation and transformation vie rectanglePlus
commit 51459dbf57a86e3eac66ec0da02de40dc1b68796
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Aug 9 08:53:50 2022 +0200
add derotation and to pdf coords transformation to cv-analysis output
commit 733991e2f5a4664205b2f7cc756cebcbc9ee3930
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Mon Aug 8 15:15:13 2022 +0200
update pipline with detrotation logic WIP
53 lines
1.7 KiB
Python
53 lines
1.7 KiB
Python
import argparse
|
|
import json
|
|
from operator import itemgetter
|
|
from pathlib import Path
|
|
|
|
import fitz
|
|
from funcy import lmap
|
|
|
|
from cv_analysis.server.pipeline import get_analysis_fn, make_analysis_pipeline, get_analysis_pipeline
|
|
from cv_analysis.utils.display import show_image_mpl
|
|
from pdf2img.extraction import extract_images
|
|
from pdf2img.get_rectangles import parse_to_image_rectangles
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("pdf_path")
|
|
parser.add_argument("output_folder")
|
|
parser.add_argument("--type", "-t", choices=["table", "layout", "figure"], required=True)
|
|
parser.add_argument("--verbose", action="store_true")
|
|
parser.add_argument("--silent", dest="verbose", action="store_false")
|
|
parser.set_defaults(verbose=False)
|
|
return parser.parse_args()
|
|
|
|
|
|
def analyse_annotate_save(pdf, analysis_type, output_path, verbose):
|
|
pipe = get_analysis_pipeline(analysis_type)
|
|
results = list(pipe(pdf))
|
|
|
|
if verbose:
|
|
print(json.dumps(results, indent=2))
|
|
|
|
with fitz.open(stream=pdf) as pdf_handle:
|
|
for result in results:
|
|
page = pdf_handle[result["pageInfo"]["number"]]
|
|
bbox = result["boundingBoxScreen"]
|
|
x1, y1, x2, y2 = itemgetter("x0", "y0", "x1", "y1")(bbox)
|
|
rect = fitz.Rect(x1, y1, x2, y2)
|
|
page.draw_rect(rect, color=(0.5, 0.7, 0.2), width=2)
|
|
pdf_handle.save(output_path)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args = parse_args()
|
|
|
|
with open(args.pdf_path, "rb") as f:
|
|
pdf_bytes = f.read()
|
|
|
|
Path(args.output_folder).mkdir(parents=True, exist_ok=True)
|
|
output_path = f"{args.output_folder}/{Path(args.pdf_path).stem}_annotated_{args.type}.pdf"
|
|
|
|
analyse_annotate_save(pdf_bytes, args.type, output_path, args.verbose)
|