from functools import singledispatch from operator import itemgetter from pathlib import Path from typing import Union import fitz # type: ignore from kn_utils.logging import logger from cv_analysis.utils.image_extraction import mirror_horizontal # type: ignore def annotate_pdf( pdf: Union[str, bytes, Path], annotations, output_path: Union[str, Path, None] = None, ): pdf_bytes = provide_byte_stream(pdf) with fitz.open(stream=pdf_bytes) as pdf_handle: for page_annotations in annotations: index = page_annotations["pageNum"] annotate_page(pdf_handle[index], page_annotations) output_path = output_path or "/tmp/annotated.pdf" pdf_handle.save(output_path) logger.info(f"Annotated PDF saved to {output_path}") def annotate_page(page: fitz.Page, prediction): for box in prediction.get("boxes", []): bbox = itemgetter("x1", "y1", "x2", "y2")(box["box"]) label, probability, uuid = itemgetter("label", "probability", "uuid")(box) x0, y0, x1, y1 = bbox print(page.bound) page.draw_rect(fitz.Rect(x0, y0, x1, y1), color=(0, 0, 1), width=2) label_x, label_y = x0, y0 - 5 page.insert_text( (label_x, label_y), f"{label} ({probability:.2f}), {uuid}", fontsize=12, color=(0.4, 0.4, 1), ) for line in prediction.get("tableLines", []): start = itemgetter("x1", "y1")(line) end = itemgetter("x2", "y2")(line) bbox = (*start, *end) height = page.bound()[3] bbox = mirror_horizontal(bbox, page_height=height) start = tuple(bbox[:2]) end = tuple(bbox[2:]) page.draw_line(start, end, color=(1, 0, 0.5), width=1) return page @singledispatch def provide_byte_stream(pdf: Union[bytes, Path, str]) -> None: pass @provide_byte_stream.register(bytes) def _(pdf): return pdf @provide_byte_stream.register(str) @provide_byte_stream.register(Path) def _(pdf): with open(pdf, "rb") as pdf_file: return pdf_file.read()