funktion: Arbeit In Durchfuehrung: Hinzufuegen von Annotations Logik
This commit is contained in:
parent
8de913840f
commit
0f0fe516d0
15
scripts/parse_pdf.py
Normal file
15
scripts/parse_pdf.py
Normal file
@ -0,0 +1,15 @@
|
||||
import json
|
||||
|
||||
from cv_analysis.server.pipeline import make_image_analysis_pipeline
|
||||
from cv_analysis.table_inference import infer_lines
|
||||
|
||||
pipe = make_image_analysis_pipeline(infer_lines)
|
||||
|
||||
# FIXME: Implement argparsing
|
||||
|
||||
pdf_bytes = open("/home/junverfehrt/Documents/rosario_test_file.pdf", "rb").read()
|
||||
vlp_output = json.load(open("/home/junverfehrt/Documents/rosario_test_file_vlp.json", "r"))
|
||||
|
||||
best_result = list(pipe(data={"pdf": pdf_bytes, "vlp_output": vlp_output}))
|
||||
|
||||
print(best_result)
|
||||
58
src/cv_analysis/utils/annotate.py
Normal file
58
src/cv_analysis/utils/annotate.py
Normal file
@ -0,0 +1,58 @@
|
||||
from functools import singledispatch
|
||||
from operator import itemgetter
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
import fitz
|
||||
from kn_utils.logging import logger
|
||||
|
||||
|
||||
def annotate_pdf(pdf: Union[str, bytes, Path], predictions, output_path: Union[str, Path] = None):
|
||||
pdf_bytes = provide_byte_stream(pdf)
|
||||
with fitz.open(stream=pdf_bytes) as pdf_handle:
|
||||
for prediction in predictions:
|
||||
# FIXME: Adapt to line drawing
|
||||
index = prediction["page_idx"]
|
||||
annotate_page(pdf_handle[index], prediction)
|
||||
output_path = output_path or "/tmp/annotated.pdf"
|
||||
pdf_handle.save(output_path)
|
||||
logger.info(f"Annotated PDF saved to {output_path}")
|
||||
|
||||
|
||||
def annotate_page(page: fitz.Page, prediction):
|
||||
for box in prediction["boxes"]:
|
||||
bbox = itemgetter("x1", "y1", "x2", "y2")(box["box"])
|
||||
label, probability, uuid = itemgetter("label", "probability", "uuid")(box)
|
||||
|
||||
bbox = mirror_on_x_axis(bbox, page.bound().height)
|
||||
x0, y0, x1, y1 = bbox
|
||||
page.draw_rect(fitz.Rect(x0, y0, x1, y1), color=(0, 0, 1), width=2)
|
||||
label_x, label_y = x0, y0 - 5
|
||||
page.insert_text((label_x, label_y), f"{label} ({probability:.2f}), {uuid}", fontsize=12, color=(0.4, 0.4, 1))
|
||||
|
||||
return page
|
||||
|
||||
|
||||
def mirror_on_x_axis(bbox, page_height):
|
||||
x0, y0, x1, y1 = bbox
|
||||
y0_new = page_height - y1
|
||||
y1_new = page_height - y0
|
||||
|
||||
return x0, y0_new, x1, y1_new
|
||||
|
||||
|
||||
@singledispatch
|
||||
def provide_byte_stream(pdf: Union[bytes, Path, str]) -> bytes:
|
||||
pass
|
||||
|
||||
|
||||
@provide_byte_stream.register(bytes)
|
||||
def _(pdf):
|
||||
return pdf
|
||||
|
||||
|
||||
@provide_byte_stream.register(str)
|
||||
@provide_byte_stream.register(Path)
|
||||
def _(pdf):
|
||||
with open(pdf, "rb") as pdf_file:
|
||||
return pdf_file.read()
|
||||
Loading…
x
Reference in New Issue
Block a user