cv_analysis.utils.image_extraction module#

class cv_analysis.utils.image_extraction.PageInfo(page_num: int, rotation_matrix: Matrix, transformation_matrix: Matrix, dpi: int, width: int | float, height: int | float, image_width: int | float, image_height: int | float, rotation: int) None#

Bases: object

dpi: int#
height: int | float#
image_height: int | float#
image_width: int | float#
page_num: int#
rotation: int#
rotation_matrix: Matrix#
transformation_matrix: Matrix#
width: int | float#
cv_analysis.utils.image_extraction.extract_images_from_pdf(pdf_bytes: bytes, vlp_output: dict, dpi: int = 200) tuple[list[ndarray], list[dict], list[PageInfo]]#
Return type:

tuple[list[ndarray], list[dict], list[PageInfo]]

cv_analysis.utils.image_extraction.rescale_to_pdf(bbox: tuple[int | float, int | float, int | float, int | float], page_info: PageInfo) tuple[float, float, float, float]#
Return type:

tuple[float, float, float, float]

cv_analysis.utils.image_extraction.transform_image_coordinates_to_pdf_coordinates(bbox: tuple[int | float, int | float, int | float, int | float], rotation_matrix: Matrix, transformation_matrix: Matrix, dpi: int | None = None) Tuple#
Return type:

Tuple

cv_analysis.utils.image_extraction.transform_table_lines_by_page_info(bboxes: dict, offsets: tuple, page_info: PageInfo) dict#
Return type:

dict