2024-05-15 16:38:51 +02:00

73 lines
2.0 KiB
Python

from functools import singledispatch
from operator import itemgetter
from pathlib import Path
from typing import Union
import fitz # type: ignore
from kn_utils.logging import logger
from cv_analysis.utils.image_extraction import mirror_horizontal # type: ignore
def annotate_pdf(
pdf: Union[str, bytes, Path],
annotations,
output_path: Union[str, Path, None] = None,
):
pdf_bytes = provide_byte_stream(pdf)
with fitz.open(stream=pdf_bytes) as pdf_handle:
for page_annotations in annotations:
index = page_annotations["pageNum"]
annotate_page(pdf_handle[index], page_annotations)
output_path = output_path or "/tmp/annotated.pdf"
pdf_handle.save(output_path)
logger.info(f"Annotated PDF saved to {output_path}")
def annotate_page(page: fitz.Page, prediction):
for box in prediction.get("boxes", []):
bbox = itemgetter("x1", "y1", "x2", "y2")(box["box"])
label, probability, uuid = itemgetter("label", "probability", "uuid")(box)
x0, y0, x1, y1 = bbox
print(page.bound)
page.draw_rect(fitz.Rect(x0, y0, x1, y1), color=(0, 0, 1), width=2)
label_x, label_y = x0, y0 - 5
page.insert_text(
(label_x, label_y),
f"{label} ({probability:.2f}), {uuid}",
fontsize=12,
color=(0.4, 0.4, 1),
)
for line in prediction.get("tableLines", []):
start = itemgetter("x1", "y1")(line)
end = itemgetter("x2", "y2")(line)
bbox = (*start, *end)
height = page.bound()[3]
bbox = mirror_horizontal(bbox, page_height=height)
start = tuple(bbox[:2])
end = tuple(bbox[2:])
page.draw_line(start, end, color=(1, 0, 0.5), width=1)
return page
@singledispatch
def provide_byte_stream(pdf: Union[bytes, Path, str]) -> None:
pass
@provide_byte_stream.register(bytes)
def _(pdf):
return pdf
@provide_byte_stream.register(str)
@provide_byte_stream.register(Path)
def _(pdf):
with open(pdf, "rb") as pdf_file:
return pdf_file.read()