Revert "Pull request #21: move rotation logic to before cv-analysis, so that cv-analysis only needs to operate on portrait images and matrix rotation logic can be dropped"
This reverts commit de921e308f7e0c6d5686b14ca132910bce0bad17.
This commit is contained in:
parent
de921e308f
commit
b33dcd83a5
@ -5,6 +5,7 @@ from funcy import lmap
|
||||
|
||||
from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline
|
||||
from cv_analysis.layout_parsing import parse_layout
|
||||
from cv_analysis.server.rotate import rotate_rectangle
|
||||
from cv_analysis.table_parsing import parse_tables
|
||||
from cv_analysis.utils.pdf2image import pdf_to_image_metadata_pairs
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
@ -18,6 +19,7 @@ def make_analysis_pipeline(analysis_fn: Callable, dpi=200):
|
||||
Convert PDF to pairs of image and page information
|
||||
Analyse pages, get list of bounding boxes per page (e.g. table cells)
|
||||
Convert pixel values to inches
|
||||
Rotate results if page is rotated
|
||||
Format results to stream of dictionaries with page information and analysis results
|
||||
"""
|
||||
|
||||
@ -30,6 +32,9 @@ def make_analysis_pipeline(analysis_fn: Callable, dpi=200):
|
||||
def image_metadata_pair_to_results(image_metadata_pair):
|
||||
rectangles = analysis_fn(image_metadata_pair.image)
|
||||
rectangles = map(partial(pixel_rect_to_inches_rect, dpi=dpi), rectangles)
|
||||
if image_metadata_pair.metadata["rotation"] != 0:
|
||||
rotate_rectangle_fn = partial(rotate_rectangle, metadata=image_metadata_pair.metadata)
|
||||
rectangles = map(rotate_rectangle_fn, rectangles)
|
||||
bboxes = lmap(lambda x: x.json_xyxy(), rectangles)
|
||||
return {**image_metadata_pair.metadata, "bboxes": bboxes}
|
||||
|
||||
|
||||
107
cv_analysis/server/rotate.py
Normal file
107
cv_analysis/server/rotate.py
Normal file
@ -0,0 +1,107 @@
|
||||
from _operator import itemgetter
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
|
||||
|
||||
def rotate_rectangle(rectangle, metadata):
|
||||
width, height, rotation = itemgetter("width", "height", "rotation")(metadata)
|
||||
rotation = rotation // 90 if rotation not in [0, 1, 2, 3] else rotation
|
||||
|
||||
if rotation in [1, 3]:
|
||||
width, height = height, width
|
||||
|
||||
x1, y1, x2, y2 = rectangle.xyxy()
|
||||
matrix = np.vstack([[x1, y1], [x2, y2]]).T
|
||||
new_matrix = rotate_and_shift(matrix, rotation, (width, height))
|
||||
|
||||
x1, x2 = sorted(new_matrix[0, :])
|
||||
y1, y2 = sorted(new_matrix[1, :])
|
||||
|
||||
return Rectangle.from_xyxy((x1, y1, x2, y2), discrete=False)
|
||||
|
||||
|
||||
def rotate_and_shift(matrix, rotation, size, debug=False):
|
||||
"""Rotates a matrix against (!) a specified rotation. That is, the rotation is applied negatively. The matrix is
|
||||
also shifted to ensure it contains points (columns) in quadrant I.
|
||||
|
||||
Procedure:
|
||||
1) Rotate the matrix clockwise according to rotation value
|
||||
2) Shift the matrix back into quadrant I
|
||||
3) Set x_i and y_i to new lower left and upper right corners, since the corner vectors are no longer at these
|
||||
corners due to the rotation
|
||||
|
||||
Args:
|
||||
matrix: matrix to transform
|
||||
rotation: any of 0, 1, 2, or 3, where 1 = 90 degree CLOCKWISE rotation etc.
|
||||
size: the size of the page as a tuple (<width>, <height>)
|
||||
debug: Visualizes the transformations for later re-understanding of the code
|
||||
"""
|
||||
|
||||
def shift_to_quadrant_1(matrix):
|
||||
|
||||
# TODO: generalize
|
||||
if rotation == 0:
|
||||
back_shift = np.zeros_like(np.eye(2))
|
||||
elif rotation == 1:
|
||||
back_shift = np.array([[0, 0], [1, 1]]) * size[1]
|
||||
elif rotation == 2:
|
||||
back_shift = np.array([[1, 1], [1, 1]]) * size
|
||||
elif rotation == 3:
|
||||
back_shift = np.array([[1, 1], [0, 0]]) * size[0]
|
||||
else:
|
||||
raise ValueError(f"Unexpected rotation value '{rotation}'. Expected any of 0, 1, 2, or 3.")
|
||||
|
||||
matrix_shifted = matrix + back_shift
|
||||
return matrix_shifted
|
||||
|
||||
# PDF rotations are clockwise, hence subtract the radian value of the rotation from 2 pi
|
||||
radians = (2 * np.pi) - (np.pi * (rotation / 2))
|
||||
matrix_rotated = rotate(matrix, radians)
|
||||
matrix_rotated_and_shifted = shift_to_quadrant_1(matrix_rotated)
|
||||
|
||||
if debug:
|
||||
__show_matrices(size, radians, matrix, matrix_rotated, matrix_rotated_and_shifted)
|
||||
return matrix_rotated_and_shifted
|
||||
|
||||
|
||||
def __show_matrices(size, radians, matrix, matrix_rotated, matrix_rotated_and_shifted):
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
from copy import deepcopy
|
||||
|
||||
m1 = matrix
|
||||
m2 = matrix_rotated
|
||||
m3 = matrix_rotated_and_shifted
|
||||
|
||||
m1, m2, m3 = map(deepcopy, (m1, m2, m3))
|
||||
|
||||
frame = np.eye(2) * size
|
||||
frame_rotated = rotate(frame, radians)
|
||||
|
||||
f1 = frame
|
||||
f2 = frame_rotated
|
||||
|
||||
f1 *= 0.005 * 1
|
||||
f2 *= 0.005 * 1
|
||||
m1 *= 0.005 * 1
|
||||
m2 *= 0.005 * 1
|
||||
m3 *= 0.005 * 1
|
||||
|
||||
fig, axes = plt.subplots(1, 2, figsize=(8, 4))
|
||||
axes = axes.ravel()
|
||||
|
||||
axes[0].quiver([0, 0], [0, 0], f1[0, :], f1[1, :], scale=5, scale_units="inches", color="red")
|
||||
axes[1].quiver([0, 0], [0, 0], f2[0, :], f2[1, :], scale=5, scale_units="inches", color="red")
|
||||
axes[0].quiver([0, 0], [0, 0], m1[0, :], m1[1, :], scale=5, scale_units="inches")
|
||||
axes[1].quiver([0, 0], [0, 0], m2[0, :], m2[1, :], scale=5, scale_units="inches", color="green")
|
||||
axes[1].quiver([0, 0], [0, 0], m3[0, :], m3[1, :], scale=5, scale_units="inches", color="blue")
|
||||
|
||||
plt.show()
|
||||
|
||||
|
||||
def rotate(input_matrix, radians):
|
||||
rotation_matrix = np.vstack([[np.cos(radians), -np.sin(radians)], [np.sin(radians), np.cos(radians)]])
|
||||
|
||||
return np.dot(rotation_matrix, input_matrix)
|
||||
@ -22,7 +22,6 @@ def pdf_to_image_metadata_pairs(pdf: bytes, index=None, dpi=200) -> Iterator[Ima
|
||||
|
||||
def page_to_image_metadata_pair(page: fitz.Page, dpi):
|
||||
metadata = get_page_info(page)
|
||||
page.set_rotation(0)
|
||||
pixmap = page.get_pixmap(dpi=dpi)
|
||||
array = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(pixmap.h, pixmap.w, pixmap.n)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user