import argparse import gzip from operator import itemgetter from typing import List import fitz import pdf2image from funcy import lmap, compose, pluck from pyinfra.default_objects import get_component_factory from cv_analysis.config import CONFIG from incl.pyinfra.test.utils.image import image_to_bytes def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--pdf_path", "-p", required=True) parser.add_argument("--operation", "-o", choices=["figure_detection", "table_parsing"], required=True) parser.add_argument("--result_path", "-r", required=True) args = parser.parse_args() return args def request_metadatas(dpi, n_metadata): return [{"dpi": dpi} for _ in range(1, n_metadata)] def draw_cells_on_page(cells: List[dict], page): def format_xywh_to_x0y0x1y1(rect): x, y, w, h = rect return x, y, x + w, y + h rects = map(itemgetter("x", "y", "width", "height"), cells) rects = map(format_xywh_to_x0y0x1y1, rects) for rect in rects: page.draw_rect(rect, color=(0.3, 0.7, 0.1), width=2, overlay=True) def annotate_results_on_pdf(results, pdf_path, result_path): opened_pdf = fitz.open(pdf_path) metadata_per_page = pluck("metadata", results) for page, metadata in zip(opened_pdf, metadata_per_page): if metadata: draw_cells_on_page(metadata["cells"], page) opened_pdf.save(result_path) def main(args): dpi = 200 images = lmap(compose(gzip.compress, image_to_bytes), pdf2image.convert_from_path(args.pdf_path, dpi=dpi)) submit_endpoint = f"http://{CONFIG.webserver.host}:{CONFIG.webserver.port}/{args.operation}" pipeline = get_component_factory(CONFIG).get_pipeline(submit_endpoint) results = list(pipeline(data=images, metadata=request_metadatas(dpi, len(images)))) annotate_results_on_pdf(results, args.pdf_path, args.result_path) if __name__ == "__main__": main(parse_args())