100 lines
2.3 KiB
Python
100 lines
2.3 KiB
Python
"""Defines utilities for PDF processing."""
|
|
|
|
import json
|
|
from operator import itemgetter
|
|
|
|
from PDFNetPython3.PDFNetPython import (
|
|
PDFDoc,
|
|
PDFNet,
|
|
Square,
|
|
Rect,
|
|
ColorPt,
|
|
BorderStyle,
|
|
SDFDoc,
|
|
Point,
|
|
Text,
|
|
)
|
|
|
|
from image_prediction.utils import get_logger
|
|
|
|
logger = get_logger()
|
|
|
|
|
|
def annotate_image(doc, image_info):
|
|
def draw_box():
|
|
sq = Square.Create(doc.GetSDFDoc(), Rect(*coords))
|
|
sq.SetColor(ColorPt(*color), 3)
|
|
sq.SetBorderStyle(BorderStyle(BorderStyle.e_dashed, 2, 0, 0, [4, 2]))
|
|
sq.SetPadding(4)
|
|
sq.RefreshAppearance()
|
|
page.AnnotPushBack(sq)
|
|
|
|
def add_note():
|
|
txt = Text.Create(doc.GetSDFDoc(), Point(*coords[:2]))
|
|
txt.SetContents(json.dumps(image_info, indent=2, ensure_ascii=False))
|
|
txt.SetColor(ColorPt(*color))
|
|
page.AnnotPushBack(txt)
|
|
txt.RefreshAppearance()
|
|
|
|
red = (1, 0, 0)
|
|
green = (0, 1, 0)
|
|
blue = (0, 0, 1)
|
|
|
|
if image_info["filters"]["allPassed"]:
|
|
color = green
|
|
elif image_info["filters"]["probability"]["unconfident"]:
|
|
color = red
|
|
else:
|
|
color = blue
|
|
|
|
page = doc.GetPage(image_info["position"]["pageNumber"])
|
|
coords = itemgetter("x1", "y1", "x2", "y2")(image_info["position"])
|
|
|
|
draw_box()
|
|
add_note()
|
|
|
|
|
|
def init():
|
|
PDFNet.Initialize(
|
|
"Knecon AG(en.knecon.swiss):OEM:DDA-R::WL+:AMS(20211029):BECC974307DAB4F34B513BC9B2531B24496F6FCB83CD8AC574358A959730B622FABEF5C7"
|
|
)
|
|
|
|
|
|
def draw_metadata_box(pdf_path, metadata, store_path):
|
|
|
|
init()
|
|
|
|
doc = PDFDoc(pdf_path)
|
|
|
|
color = (1, 0, 0)
|
|
|
|
print(metadata)
|
|
|
|
coords = itemgetter("x1", "y1", "x2", "y2")(metadata)
|
|
page = doc.GetPage(1)
|
|
|
|
sq = Square.Create(doc.GetSDFDoc(), Rect(*coords))
|
|
sq.SetColor(ColorPt(*color), 3)
|
|
sq.SetBorderStyle(BorderStyle(BorderStyle.e_dashed, 2, 0, 0, [4, 2]))
|
|
sq.SetPadding(4)
|
|
sq.RefreshAppearance()
|
|
page.AnnotPushBack(sq)
|
|
|
|
doc.Save(store_path, SDFDoc.e_linearized)
|
|
|
|
logger.info(f"Saved annotated PDF to {store_path}")
|
|
|
|
|
|
def annotate_pdf(pdf_path, responses, store_path):
|
|
|
|
init()
|
|
|
|
doc = PDFDoc(pdf_path)
|
|
|
|
for image_info in responses:
|
|
annotate_image(doc, image_info)
|
|
|
|
doc.Save(store_path, SDFDoc.e_linearized)
|
|
|
|
logger.info(f"Saved annotated PDF to {store_path}")
|