Revert "Pull request #21: move rotation logic to before cv-analysis, so that cv-analysis only needs to operate on portrait images and matrix rotation logic can be dropped"

This reverts commit de921e308f7e0c6d5686b14ca132910bce0bad17.
This commit is contained in:
Isaac Riley 2022-07-29 08:50:06 +02:00
parent de921e308f
commit b33dcd83a5
3 changed files with 112 additions and 1 deletions

View File

@ -5,6 +5,7 @@ from funcy import lmap
from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline
from cv_analysis.layout_parsing import parse_layout
from cv_analysis.server.rotate import rotate_rectangle
from cv_analysis.table_parsing import parse_tables
from cv_analysis.utils.pdf2image import pdf_to_image_metadata_pairs
from cv_analysis.utils.structures import Rectangle
@ -18,6 +19,7 @@ def make_analysis_pipeline(analysis_fn: Callable, dpi=200):
Convert PDF to pairs of image and page information
Analyse pages, get list of bounding boxes per page (e.g. table cells)
Convert pixel values to inches
Rotate results if page is rotated
Format results to stream of dictionaries with page information and analysis results
"""
@ -30,6 +32,9 @@ def make_analysis_pipeline(analysis_fn: Callable, dpi=200):
def image_metadata_pair_to_results(image_metadata_pair):
rectangles = analysis_fn(image_metadata_pair.image)
rectangles = map(partial(pixel_rect_to_inches_rect, dpi=dpi), rectangles)
if image_metadata_pair.metadata["rotation"] != 0:
rotate_rectangle_fn = partial(rotate_rectangle, metadata=image_metadata_pair.metadata)
rectangles = map(rotate_rectangle_fn, rectangles)
bboxes = lmap(lambda x: x.json_xyxy(), rectangles)
return {**image_metadata_pair.metadata, "bboxes": bboxes}

View File

@ -0,0 +1,107 @@
from _operator import itemgetter
import numpy as np
from cv_analysis.utils.structures import Rectangle
def rotate_rectangle(rectangle, metadata):
width, height, rotation = itemgetter("width", "height", "rotation")(metadata)
rotation = rotation // 90 if rotation not in [0, 1, 2, 3] else rotation
if rotation in [1, 3]:
width, height = height, width
x1, y1, x2, y2 = rectangle.xyxy()
matrix = np.vstack([[x1, y1], [x2, y2]]).T
new_matrix = rotate_and_shift(matrix, rotation, (width, height))
x1, x2 = sorted(new_matrix[0, :])
y1, y2 = sorted(new_matrix[1, :])
return Rectangle.from_xyxy((x1, y1, x2, y2), discrete=False)
def rotate_and_shift(matrix, rotation, size, debug=False):
"""Rotates a matrix against (!) a specified rotation. That is, the rotation is applied negatively. The matrix is
also shifted to ensure it contains points (columns) in quadrant I.
Procedure:
1) Rotate the matrix clockwise according to rotation value
2) Shift the matrix back into quadrant I
3) Set x_i and y_i to new lower left and upper right corners, since the corner vectors are no longer at these
corners due to the rotation
Args:
matrix: matrix to transform
rotation: any of 0, 1, 2, or 3, where 1 = 90 degree CLOCKWISE rotation etc.
size: the size of the page as a tuple (<width>, <height>)
debug: Visualizes the transformations for later re-understanding of the code
"""
def shift_to_quadrant_1(matrix):
# TODO: generalize
if rotation == 0:
back_shift = np.zeros_like(np.eye(2))
elif rotation == 1:
back_shift = np.array([[0, 0], [1, 1]]) * size[1]
elif rotation == 2:
back_shift = np.array([[1, 1], [1, 1]]) * size
elif rotation == 3:
back_shift = np.array([[1, 1], [0, 0]]) * size[0]
else:
raise ValueError(f"Unexpected rotation value '{rotation}'. Expected any of 0, 1, 2, or 3.")
matrix_shifted = matrix + back_shift
return matrix_shifted
# PDF rotations are clockwise, hence subtract the radian value of the rotation from 2 pi
radians = (2 * np.pi) - (np.pi * (rotation / 2))
matrix_rotated = rotate(matrix, radians)
matrix_rotated_and_shifted = shift_to_quadrant_1(matrix_rotated)
if debug:
__show_matrices(size, radians, matrix, matrix_rotated, matrix_rotated_and_shifted)
return matrix_rotated_and_shifted
def __show_matrices(size, radians, matrix, matrix_rotated, matrix_rotated_and_shifted):
import matplotlib.pyplot as plt
from copy import deepcopy
m1 = matrix
m2 = matrix_rotated
m3 = matrix_rotated_and_shifted
m1, m2, m3 = map(deepcopy, (m1, m2, m3))
frame = np.eye(2) * size
frame_rotated = rotate(frame, radians)
f1 = frame
f2 = frame_rotated
f1 *= 0.005 * 1
f2 *= 0.005 * 1
m1 *= 0.005 * 1
m2 *= 0.005 * 1
m3 *= 0.005 * 1
fig, axes = plt.subplots(1, 2, figsize=(8, 4))
axes = axes.ravel()
axes[0].quiver([0, 0], [0, 0], f1[0, :], f1[1, :], scale=5, scale_units="inches", color="red")
axes[1].quiver([0, 0], [0, 0], f2[0, :], f2[1, :], scale=5, scale_units="inches", color="red")
axes[0].quiver([0, 0], [0, 0], m1[0, :], m1[1, :], scale=5, scale_units="inches")
axes[1].quiver([0, 0], [0, 0], m2[0, :], m2[1, :], scale=5, scale_units="inches", color="green")
axes[1].quiver([0, 0], [0, 0], m3[0, :], m3[1, :], scale=5, scale_units="inches", color="blue")
plt.show()
def rotate(input_matrix, radians):
rotation_matrix = np.vstack([[np.cos(radians), -np.sin(radians)], [np.sin(radians), np.cos(radians)]])
return np.dot(rotation_matrix, input_matrix)

View File

@ -22,7 +22,6 @@ def pdf_to_image_metadata_pairs(pdf: bytes, index=None, dpi=200) -> Iterator[Ima
def page_to_image_metadata_pair(page: fitz.Page, dpi):
metadata = get_page_info(page)
page.set_rotation(0)
pixmap = page.get_pixmap(dpi=dpi)
array = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(pixmap.h, pixmap.w, pixmap.n)