From b33dcd83a5940e33922a161807a18ea033f587cf Mon Sep 17 00:00:00 2001
From: Isaac Riley <Isaac.Riley@iqser.com>
Date: Fri, 29 Jul 2022 08:50:06 +0200
Subject: [PATCH] Revert "Pull request #21: move rotation logic to before
 cv-analysis, so that cv-analysis only needs to operate on portrait images and
 matrix rotation logic can be dropped"

This reverts commit de921e308f7e0c6d5686b14ca132910bce0bad17.
---
 cv_analysis/server/pipeline.py |   5 ++
 cv_analysis/server/rotate.py   | 107 +++++++++++++++++++++++++++++++++
 cv_analysis/utils/pdf2image.py |   1 -
 3 files changed, 112 insertions(+), 1 deletion(-)
 create mode 100644 cv_analysis/server/rotate.py
diff --git a/cv_analysis/server/pipeline.py b/cv_analysis/server/pipeline.py
index e611914..44c346e 100644
--- a/cv_analysis/server/pipeline.py
+++ b/cv_analysis/server/pipeline.py
@@ -5,6 +5,7 @@ from funcy import lmap
 
 from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline
 from cv_analysis.layout_parsing import parse_layout
+from cv_analysis.server.rotate import rotate_rectangle
 from cv_analysis.table_parsing import parse_tables
 from cv_analysis.utils.pdf2image import pdf_to_image_metadata_pairs
 from cv_analysis.utils.structures import Rectangle
@@ -18,6 +19,7 @@ def make_analysis_pipeline(analysis_fn: Callable, dpi=200):
         Convert PDF to pairs of image and page information
         Analyse pages, get list of bounding boxes per page (e.g. table cells)
         Convert pixel values to inches
+        Rotate results if page is rotated
         Format results to stream of dictionaries with page information and analysis results
     """
 
@@ -30,6 +32,9 @@ def make_analysis_pipeline(analysis_fn: Callable, dpi=200):
     def image_metadata_pair_to_results(image_metadata_pair):
         rectangles = analysis_fn(image_metadata_pair.image)
         rectangles = map(partial(pixel_rect_to_inches_rect, dpi=dpi), rectangles)
+        if image_metadata_pair.metadata["rotation"] != 0:
+            rotate_rectangle_fn = partial(rotate_rectangle, metadata=image_metadata_pair.metadata)
+            rectangles = map(rotate_rectangle_fn, rectangles)
         bboxes = lmap(lambda x: x.json_xyxy(), rectangles)
         return {**image_metadata_pair.metadata, "bboxes": bboxes}
 
diff --git a/cv_analysis/server/rotate.py b/cv_analysis/server/rotate.py
new file mode 100644
index 0000000..ec9a867
--- /dev/null
+++ b/cv_analysis/server/rotate.py
@@ -0,0 +1,107 @@
+from _operator import itemgetter
+
+import numpy as np
+
+from cv_analysis.utils.structures import Rectangle
+
+
+def rotate_rectangle(rectangle, metadata):
+    width, height, rotation = itemgetter("width", "height", "rotation")(metadata)
+    rotation = rotation // 90 if rotation not in [0, 1, 2, 3] else rotation
+
+    if rotation in [1, 3]:
+        width, height = height, width
+
+    x1, y1, x2, y2 = rectangle.xyxy()
+    matrix = np.vstack([[x1, y1], [x2, y2]]).T
+    new_matrix = rotate_and_shift(matrix, rotation, (width, height))
+
+    x1, x2 = sorted(new_matrix[0, :])
+    y1, y2 = sorted(new_matrix[1, :])
+
+    return Rectangle.from_xyxy((x1, y1, x2, y2), discrete=False)
+
+
+def rotate_and_shift(matrix, rotation, size, debug=False):
+    """Rotates a matrix against (!) a specified rotation. That is, the rotation is applied negatively. The matrix is
+    also shifted to ensure it contains points (columns) in quadrant I.
+
+    Procedure:
+        1) Rotate the matrix clockwise according to rotation value
+        2) Shift the matrix back into quadrant I
+        3) Set x_i and y_i to new lower left and upper right corners, since the corner vectors are no longer at these
+            corners due to the rotation
+
+    Args:
+        matrix: matrix to transform
+        rotation: any of  0, 1, 2, or 3, where 1 = 90 degree CLOCKWISE rotation etc.
+        size: the size of the page as a tuple (<width>, <height>)
+        debug: Visualizes the transformations for later re-understanding of the code
+    """
+
+    def shift_to_quadrant_1(matrix):
+
+        # TODO: generalize
+        if rotation == 0:
+            back_shift = np.zeros_like(np.eye(2))
+        elif rotation == 1:
+            back_shift = np.array([[0, 0], [1, 1]]) * size[1]
+        elif rotation == 2:
+            back_shift = np.array([[1, 1], [1, 1]]) * size
+        elif rotation == 3:
+            back_shift = np.array([[1, 1], [0, 0]]) * size[0]
+        else:
+            raise ValueError(f"Unexpected rotation value '{rotation}'. Expected any of 0, 1, 2, or 3.")
+
+        matrix_shifted = matrix + back_shift
+        return matrix_shifted
+
+    # PDF rotations are clockwise, hence subtract the radian value of the rotation from 2 pi
+    radians = (2 * np.pi) - (np.pi * (rotation / 2))
+    matrix_rotated = rotate(matrix, radians)
+    matrix_rotated_and_shifted = shift_to_quadrant_1(matrix_rotated)
+
+    if debug:
+        __show_matrices(size, radians, matrix, matrix_rotated, matrix_rotated_and_shifted)
+    return matrix_rotated_and_shifted
+
+
+def __show_matrices(size, radians, matrix, matrix_rotated, matrix_rotated_and_shifted):
+
+    import matplotlib.pyplot as plt
+    from copy import deepcopy
+
+    m1 = matrix
+    m2 = matrix_rotated
+    m3 = matrix_rotated_and_shifted
+
+    m1, m2, m3 = map(deepcopy, (m1, m2, m3))
+
+    frame = np.eye(2) * size
+    frame_rotated = rotate(frame, radians)
+
+    f1 = frame
+    f2 = frame_rotated
+
+    f1 *= 0.005 * 1
+    f2 *= 0.005 * 1
+    m1 *= 0.005 * 1
+    m2 *= 0.005 * 1
+    m3 *= 0.005 * 1
+
+    fig, axes = plt.subplots(1, 2, figsize=(8, 4))
+    axes = axes.ravel()
+
+    axes[0].quiver([0, 0], [0, 0], f1[0, :], f1[1, :], scale=5, scale_units="inches", color="red")
+    axes[1].quiver([0, 0], [0, 0], f2[0, :], f2[1, :], scale=5, scale_units="inches", color="red")
+    axes[0].quiver([0, 0], [0, 0], m1[0, :], m1[1, :], scale=5, scale_units="inches")
+    axes[1].quiver([0, 0], [0, 0], m2[0, :], m2[1, :], scale=5, scale_units="inches", color="green")
+    axes[1].quiver([0, 0], [0, 0], m3[0, :], m3[1, :], scale=5, scale_units="inches", color="blue")
+
+    plt.show()
+
+
+def rotate(input_matrix, radians):
+    rotation_matrix = np.vstack([[np.cos(radians), -np.sin(radians)], [np.sin(radians), np.cos(radians)]])
+
+    return np.dot(rotation_matrix, input_matrix)
diff --git a/cv_analysis/utils/pdf2image.py b/cv_analysis/utils/pdf2image.py
index 1be9a5c..a26b003 100644
--- a/cv_analysis/utils/pdf2image.py
+++ b/cv_analysis/utils/pdf2image.py
@@ -22,7 +22,6 @@ def pdf_to_image_metadata_pairs(pdf: bytes, index=None, dpi=200) -> Iterator[Ima
 
 def page_to_image_metadata_pair(page: fitz.Page, dpi):
     metadata = get_page_info(page)
-    page.set_rotation(0)
     pixmap = page.get_pixmap(dpi=dpi)
     array = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(pixmap.h, pixmap.w, pixmap.n)