funktion: In Arbeit: Hinzufuegung von Tragbares Dokumentenformat Koordinaten Konvertierung

This commit is contained in:
Julius Unverfehrt 2024-04-23 16:32:23 +02:00 committed by iriley
parent aefb73bf28
commit 8de913840f
2 changed files with 54 additions and 11 deletions

View File

@ -10,7 +10,7 @@ from pdf2img.default_objects.rectangle import RectanglePlus
from cv_analysis.figure_detection.figure_detection import detect_figures
from cv_analysis.table_inference import infer_lines
from cv_analysis.table_parsing import parse_lines, parse_tables
from cv_analysis.utils.image_extraction import extract_images_from_pdf
from cv_analysis.utils.image_extraction import extract_images_from_pdf, transform_table_lines_by_page_info
from cv_analysis.utils.structures import Rectangle
@ -45,8 +45,9 @@ def make_image_analysis_pipeline(
def analyse_pipeline(data: dict) -> Generator[dict, bytes, None]:
pdf_bytes = data["pdf"]
vlp_output = data["vlp_output"]
images, info = extract_images_from_pdf(pdf_bytes, vlp_output)
img_results = list(map(analysis_fn, images))
images, info, page_info = extract_images_from_pdf(pdf_bytes, vlp_output)
img_results = lmap(analysis_fn, images)
img_results = lmap(transform_table_lines_by_page_info, img_results, page_info)
results = map(lambda i: info[i] | img_results[i], range(len(info)))
yield from results

View File

@ -1,26 +1,60 @@
from dataclasses import dataclass
from functools import partial
from operator import itemgetter
from typing import Iterable
from typing import Tuple
import fitz
import numpy as np
from funcy import compose
from numpy import ndarray as Array
@dataclass
class PageInfo:
page_num: int
rotation_matrix: fitz.Matrix
transformation_matrix: fitz.Matrix
dpi: int
def transform_image_coordinates_to_pdf_coordinates(
bbox: Iterable[int | float], rotation_matrix: fitz.Matrix, transformation_matrix: fitz.Matrix, dpi: int = None
) -> Tuple:
x1, y1, x2, y2 = map(lambda x: (x / dpi) * 72, bbox) if dpi else bbox # Convert to points, can be done before
rect = fitz.Rect(x1, y1, x2, y2)
rect = rect.transform(rotation_matrix).transform(transformation_matrix)
rect = rect * rotation_matrix * transformation_matrix
return rect.x0, rect.y0, rect.x1, rect.y1
def extract_images_from_pdf(pdf_bytes: bytes, vlp_output: dict, dpi: int = 200) -> tuple[list[Array], dict]:
def transform_table_lines_by_page_info(bboxes: dict, page_info: PageInfo) -> dict:
# FIXME: Also convert image info? Is image info necessary?
# Also, the resulting lines are not in the table bbox, is this okay?
transform = partial(
transform_image_coordinates_to_pdf_coordinates,
rotation_matrix=page_info.rotation_matrix,
transformation_matrix=page_info.transformation_matrix,
dpi=page_info.dpi,
)
unpack = itemgetter("x1", "y1", "x2", "y2")
pack = lambda x: {"x1": x[0], "y1": x[1], "x2": x[2], "y2": x[3]}
convert = compose(pack, transform, unpack)
table_lines = bboxes.get("tableLines", [])
transformed_lines = list(map(convert, table_lines))
bboxes["tableLines"] = transformed_lines
return bboxes
def extract_images_from_pdf(
pdf_bytes: bytes, vlp_output: dict, dpi: int = 200
) -> tuple[list[Array], list[dict], list[PageInfo]]:
with fitz.open(stream=pdf_bytes) as fh:
images = []
info = []
table_images = []
table_info = []
page_info = []
vlp_output = vlp_output["data"] if isinstance(vlp_output, dict) else vlp_output
@ -30,17 +64,25 @@ def extract_images_from_pdf(pdf_bytes: bytes, vlp_output: dict, dpi: int = 200)
boxes = filter(lambda box_obj: box_obj["label"] == "table", boxes)
page = fh[page_num] # pages[int(page_num)]
# TODO: Workaround to be able to transform the image coordinates to pdf coordinates in a later step.
current_page_info = PageInfo(page_num, page.rotation_matrix, page.transformation_matrix, dpi)
for box_obj in boxes:
bbox = box_obj["box"]
x1, y1, x2, y2 = itemgetter("x1", "y1", "x2", "y2")(bbox)
rect = fitz.Rect((x1, y1), (x2, y2))
rect = rect * page.transformation_matrix
# FIXME: Check if de-rotation works as intended and is necessary at all.
# Note that there exists also a derotation_matrix. If changing this, also change the
# current_page_info object to include the derotation_matrix.
rect = rect * page.transformation_matrix * page.rotation_matrix
pixmap = page.get_pixmap(clip=rect, dpi=dpi, colorspace=fitz.csGRAY)
shape = (pixmap.h, pixmap.w, pixmap.n) if pixmap.n > 1 else (pixmap.h, pixmap.w)
image = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(*shape)
images.append(image)
info.append({"pageNum": page_num, "bbox": bbox, "uuid": box_obj["uuid"], "label": box_obj["label"]})
table_images.append(image)
table_info.append(
{"pageNum": page_num, "bbox": bbox, "uuid": box_obj["uuid"], "label": box_obj["label"]}
)
page_info.append(current_page_info)
return images, info
return table_images, table_info, page_info