From b33dcd83a5940e33922a161807a18ea033f587cf Mon Sep 17 00:00:00 2001 From: Isaac Riley Date: Fri, 29 Jul 2022 08:50:06 +0200 Subject: [PATCH] Revert "Pull request #21: move rotation logic to before cv-analysis, so that cv-analysis only needs to operate on portrait images and matrix rotation logic can be dropped" This reverts commit de921e308f7e0c6d5686b14ca132910bce0bad17. --- cv_analysis/server/pipeline.py | 5 ++ cv_analysis/server/rotate.py | 107 +++++++++++++++++++++++++++++++++ cv_analysis/utils/pdf2image.py | 1 - 3 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 cv_analysis/server/rotate.py diff --git a/cv_analysis/server/pipeline.py b/cv_analysis/server/pipeline.py index e611914..44c346e 100644 --- a/cv_analysis/server/pipeline.py +++ b/cv_analysis/server/pipeline.py @@ -5,6 +5,7 @@ from funcy import lmap from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline from cv_analysis.layout_parsing import parse_layout +from cv_analysis.server.rotate import rotate_rectangle from cv_analysis.table_parsing import parse_tables from cv_analysis.utils.pdf2image import pdf_to_image_metadata_pairs from cv_analysis.utils.structures import Rectangle @@ -18,6 +19,7 @@ def make_analysis_pipeline(analysis_fn: Callable, dpi=200): Convert PDF to pairs of image and page information Analyse pages, get list of bounding boxes per page (e.g. table cells) Convert pixel values to inches + Rotate results if page is rotated Format results to stream of dictionaries with page information and analysis results """ @@ -30,6 +32,9 @@ def make_analysis_pipeline(analysis_fn: Callable, dpi=200): def image_metadata_pair_to_results(image_metadata_pair): rectangles = analysis_fn(image_metadata_pair.image) rectangles = map(partial(pixel_rect_to_inches_rect, dpi=dpi), rectangles) + if image_metadata_pair.metadata["rotation"] != 0: + rotate_rectangle_fn = partial(rotate_rectangle, metadata=image_metadata_pair.metadata) + rectangles = map(rotate_rectangle_fn, rectangles) bboxes = lmap(lambda x: x.json_xyxy(), rectangles) return {**image_metadata_pair.metadata, "bboxes": bboxes} diff --git a/cv_analysis/server/rotate.py b/cv_analysis/server/rotate.py new file mode 100644 index 0000000..ec9a867 --- /dev/null +++ b/cv_analysis/server/rotate.py @@ -0,0 +1,107 @@ +from _operator import itemgetter + +import numpy as np + +from cv_analysis.utils.structures import Rectangle + + +def rotate_rectangle(rectangle, metadata): + width, height, rotation = itemgetter("width", "height", "rotation")(metadata) + rotation = rotation // 90 if rotation not in [0, 1, 2, 3] else rotation + + if rotation in [1, 3]: + width, height = height, width + + x1, y1, x2, y2 = rectangle.xyxy() + matrix = np.vstack([[x1, y1], [x2, y2]]).T + new_matrix = rotate_and_shift(matrix, rotation, (width, height)) + + x1, x2 = sorted(new_matrix[0, :]) + y1, y2 = sorted(new_matrix[1, :]) + + return Rectangle.from_xyxy((x1, y1, x2, y2), discrete=False) + + +def rotate_and_shift(matrix, rotation, size, debug=False): + """Rotates a matrix against (!) a specified rotation. That is, the rotation is applied negatively. The matrix is + also shifted to ensure it contains points (columns) in quadrant I. + + Procedure: + 1) Rotate the matrix clockwise according to rotation value + 2) Shift the matrix back into quadrant I + 3) Set x_i and y_i to new lower left and upper right corners, since the corner vectors are no longer at these + corners due to the rotation + + Args: + matrix: matrix to transform + rotation: any of 0, 1, 2, or 3, where 1 = 90 degree CLOCKWISE rotation etc. + size: the size of the page as a tuple (, ) + debug: Visualizes the transformations for later re-understanding of the code + """ + + def shift_to_quadrant_1(matrix): + + # TODO: generalize + if rotation == 0: + back_shift = np.zeros_like(np.eye(2)) + elif rotation == 1: + back_shift = np.array([[0, 0], [1, 1]]) * size[1] + elif rotation == 2: + back_shift = np.array([[1, 1], [1, 1]]) * size + elif rotation == 3: + back_shift = np.array([[1, 1], [0, 0]]) * size[0] + else: + raise ValueError(f"Unexpected rotation value '{rotation}'. Expected any of 0, 1, 2, or 3.") + + matrix_shifted = matrix + back_shift + return matrix_shifted + + # PDF rotations are clockwise, hence subtract the radian value of the rotation from 2 pi + radians = (2 * np.pi) - (np.pi * (rotation / 2)) + matrix_rotated = rotate(matrix, radians) + matrix_rotated_and_shifted = shift_to_quadrant_1(matrix_rotated) + + if debug: + __show_matrices(size, radians, matrix, matrix_rotated, matrix_rotated_and_shifted) + return matrix_rotated_and_shifted + + +def __show_matrices(size, radians, matrix, matrix_rotated, matrix_rotated_and_shifted): + + import matplotlib.pyplot as plt + from copy import deepcopy + + m1 = matrix + m2 = matrix_rotated + m3 = matrix_rotated_and_shifted + + m1, m2, m3 = map(deepcopy, (m1, m2, m3)) + + frame = np.eye(2) * size + frame_rotated = rotate(frame, radians) + + f1 = frame + f2 = frame_rotated + + f1 *= 0.005 * 1 + f2 *= 0.005 * 1 + m1 *= 0.005 * 1 + m2 *= 0.005 * 1 + m3 *= 0.005 * 1 + + fig, axes = plt.subplots(1, 2, figsize=(8, 4)) + axes = axes.ravel() + + axes[0].quiver([0, 0], [0, 0], f1[0, :], f1[1, :], scale=5, scale_units="inches", color="red") + axes[1].quiver([0, 0], [0, 0], f2[0, :], f2[1, :], scale=5, scale_units="inches", color="red") + axes[0].quiver([0, 0], [0, 0], m1[0, :], m1[1, :], scale=5, scale_units="inches") + axes[1].quiver([0, 0], [0, 0], m2[0, :], m2[1, :], scale=5, scale_units="inches", color="green") + axes[1].quiver([0, 0], [0, 0], m3[0, :], m3[1, :], scale=5, scale_units="inches", color="blue") + + plt.show() + + +def rotate(input_matrix, radians): + rotation_matrix = np.vstack([[np.cos(radians), -np.sin(radians)], [np.sin(radians), np.cos(radians)]]) + + return np.dot(rotation_matrix, input_matrix) diff --git a/cv_analysis/utils/pdf2image.py b/cv_analysis/utils/pdf2image.py index 1be9a5c..a26b003 100644 --- a/cv_analysis/utils/pdf2image.py +++ b/cv_analysis/utils/pdf2image.py @@ -22,7 +22,6 @@ def pdf_to_image_metadata_pairs(pdf: bytes, index=None, dpi=200) -> Iterator[Ima def page_to_image_metadata_pair(page: fitz.Page, dpi): metadata = get_page_info(page) - page.set_rotation(0) pixmap = page.get_pixmap(dpi=dpi) array = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(pixmap.h, pixmap.w, pixmap.n)