funktion: In Arbeit: Hinzufuegung von Tragbares Dokumentenformat Koordinaten Konvertierung
This commit is contained in:
parent
aefb73bf28
commit
8de913840f
@ -10,7 +10,7 @@ from pdf2img.default_objects.rectangle import RectanglePlus
|
||||
from cv_analysis.figure_detection.figure_detection import detect_figures
|
||||
from cv_analysis.table_inference import infer_lines
|
||||
from cv_analysis.table_parsing import parse_lines, parse_tables
|
||||
from cv_analysis.utils.image_extraction import extract_images_from_pdf
|
||||
from cv_analysis.utils.image_extraction import extract_images_from_pdf, transform_table_lines_by_page_info
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
|
||||
|
||||
@ -45,8 +45,9 @@ def make_image_analysis_pipeline(
|
||||
def analyse_pipeline(data: dict) -> Generator[dict, bytes, None]:
|
||||
pdf_bytes = data["pdf"]
|
||||
vlp_output = data["vlp_output"]
|
||||
images, info = extract_images_from_pdf(pdf_bytes, vlp_output)
|
||||
img_results = list(map(analysis_fn, images))
|
||||
images, info, page_info = extract_images_from_pdf(pdf_bytes, vlp_output)
|
||||
img_results = lmap(analysis_fn, images)
|
||||
img_results = lmap(transform_table_lines_by_page_info, img_results, page_info)
|
||||
results = map(lambda i: info[i] | img_results[i], range(len(info)))
|
||||
|
||||
yield from results
|
||||
|
||||
@ -1,26 +1,60 @@
|
||||
from dataclasses import dataclass
|
||||
from functools import partial
|
||||
from operator import itemgetter
|
||||
from typing import Iterable
|
||||
from typing import Tuple
|
||||
|
||||
import fitz
|
||||
import numpy as np
|
||||
from funcy import compose
|
||||
from numpy import ndarray as Array
|
||||
|
||||
|
||||
@dataclass
|
||||
class PageInfo:
|
||||
page_num: int
|
||||
rotation_matrix: fitz.Matrix
|
||||
transformation_matrix: fitz.Matrix
|
||||
dpi: int
|
||||
|
||||
|
||||
def transform_image_coordinates_to_pdf_coordinates(
|
||||
bbox: Iterable[int | float], rotation_matrix: fitz.Matrix, transformation_matrix: fitz.Matrix, dpi: int = None
|
||||
) -> Tuple:
|
||||
x1, y1, x2, y2 = map(lambda x: (x / dpi) * 72, bbox) if dpi else bbox # Convert to points, can be done before
|
||||
rect = fitz.Rect(x1, y1, x2, y2)
|
||||
rect = rect.transform(rotation_matrix).transform(transformation_matrix)
|
||||
rect = rect * rotation_matrix * transformation_matrix
|
||||
|
||||
return rect.x0, rect.y0, rect.x1, rect.y1
|
||||
|
||||
|
||||
def extract_images_from_pdf(pdf_bytes: bytes, vlp_output: dict, dpi: int = 200) -> tuple[list[Array], dict]:
|
||||
def transform_table_lines_by_page_info(bboxes: dict, page_info: PageInfo) -> dict:
|
||||
# FIXME: Also convert image info? Is image info necessary?
|
||||
# Also, the resulting lines are not in the table bbox, is this okay?
|
||||
transform = partial(
|
||||
transform_image_coordinates_to_pdf_coordinates,
|
||||
rotation_matrix=page_info.rotation_matrix,
|
||||
transformation_matrix=page_info.transformation_matrix,
|
||||
dpi=page_info.dpi,
|
||||
)
|
||||
unpack = itemgetter("x1", "y1", "x2", "y2")
|
||||
pack = lambda x: {"x1": x[0], "y1": x[1], "x2": x[2], "y2": x[3]}
|
||||
convert = compose(pack, transform, unpack)
|
||||
|
||||
table_lines = bboxes.get("tableLines", [])
|
||||
transformed_lines = list(map(convert, table_lines))
|
||||
bboxes["tableLines"] = transformed_lines
|
||||
|
||||
return bboxes
|
||||
|
||||
|
||||
def extract_images_from_pdf(
|
||||
pdf_bytes: bytes, vlp_output: dict, dpi: int = 200
|
||||
) -> tuple[list[Array], list[dict], list[PageInfo]]:
|
||||
with fitz.open(stream=pdf_bytes) as fh:
|
||||
images = []
|
||||
info = []
|
||||
table_images = []
|
||||
table_info = []
|
||||
page_info = []
|
||||
|
||||
vlp_output = vlp_output["data"] if isinstance(vlp_output, dict) else vlp_output
|
||||
|
||||
@ -30,17 +64,25 @@ def extract_images_from_pdf(pdf_bytes: bytes, vlp_output: dict, dpi: int = 200)
|
||||
boxes = filter(lambda box_obj: box_obj["label"] == "table", boxes)
|
||||
|
||||
page = fh[page_num] # pages[int(page_num)]
|
||||
# TODO: Workaround to be able to transform the image coordinates to pdf coordinates in a later step.
|
||||
current_page_info = PageInfo(page_num, page.rotation_matrix, page.transformation_matrix, dpi)
|
||||
|
||||
for box_obj in boxes:
|
||||
bbox = box_obj["box"]
|
||||
x1, y1, x2, y2 = itemgetter("x1", "y1", "x2", "y2")(bbox)
|
||||
rect = fitz.Rect((x1, y1), (x2, y2))
|
||||
rect = rect * page.transformation_matrix
|
||||
# FIXME: Check if de-rotation works as intended and is necessary at all.
|
||||
# Note that there exists also a derotation_matrix. If changing this, also change the
|
||||
# current_page_info object to include the derotation_matrix.
|
||||
rect = rect * page.transformation_matrix * page.rotation_matrix
|
||||
pixmap = page.get_pixmap(clip=rect, dpi=dpi, colorspace=fitz.csGRAY)
|
||||
shape = (pixmap.h, pixmap.w, pixmap.n) if pixmap.n > 1 else (pixmap.h, pixmap.w)
|
||||
image = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(*shape)
|
||||
|
||||
images.append(image)
|
||||
info.append({"pageNum": page_num, "bbox": bbox, "uuid": box_obj["uuid"], "label": box_obj["label"]})
|
||||
table_images.append(image)
|
||||
table_info.append(
|
||||
{"pageNum": page_num, "bbox": bbox, "uuid": box_obj["uuid"], "label": box_obj["label"]}
|
||||
)
|
||||
page_info.append(current_page_info)
|
||||
|
||||
return images, info
|
||||
return table_images, table_info, page_info
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user