From f37b6d7d8eb89a2907e9483c6900ae2c048114df Mon Sep 17 00:00:00 2001 From: Julius Unverfehrt Date: Thu, 7 Jul 2022 11:35:12 +0200 Subject: [PATCH] Pull request #13: Add pdf coord conversion Merge in RR/cv-analysis from add-pdf-coord-conversion to master Squashed commit of the following: commit f56b7b45feb78142b032ef0faae2ca8dd020e6c5 Author: Julius Unverfehrt Date: Thu Jul 7 11:26:46 2022 +0200 update pyinfra commit 9086ef0a2059688fb8dd5559cda831bbbd36362b Author: Julius Unverfehrt Date: Thu Jul 7 11:21:53 2022 +0200 update inpout metadata keys commit 55f147a5848e22ea62242ea883a0ce53ef1c04a5 Author: Julius Unverfehrt Date: Thu Jul 7 09:16:16 2022 +0200 update to new input metadata signature commit df4652fb027f734f2613e4adb7bc5b17edee62e9 Author: Julius Unverfehrt Date: Wed Jul 6 16:55:36 2022 +0200 refactor commit e52c674085a9c7411c55a2e0993aa34622284317 Author: Julius Unverfehrt Date: Wed Jul 6 16:15:21 2022 +0200 update build script, refactor commit 1f874aea591f25544aaa3f39a4e38fa50a24615e Author: Julius Unverfehrt Date: Tue Jul 5 17:01:15 2022 +0200 add rotation formatter commit b78a69741287a4cd38a90ace98f67e8f1b803737 Author: Julius Unverfehrt Date: Tue Jul 5 09:26:27 2022 +0200 refactor commit b3155b8e072530f99114f3ee9135e73afc8f85cb Author: Julius Unverfehrt Date: Fri Jul 1 15:06:45 2022 +0200 made assertion robust to floating point precision commit 4169102a6b5053500a3db2d789d265c2c77d56a4 Author: Julius Unverfehrt Date: Fri Jul 1 15:06:01 2022 +0200 improve banner commit dea74593d925c802489e5400297b48a9729038f0 Author: Julius Unverfehrt Date: Fri Jul 1 14:28:08 2022 +0200 introduce derotation logic for rectangles from rotated pdfs, introduce continious option for coordinates in Rectangle class commit d07e1dc2731ea7ae9887cc02bb98155bf1565a0d Author: Julius Unverfehrt Date: Fri Jul 1 10:39:38 2022 +0200 introduce table parsing formatter to convert pixel values to inches commit 67ff6730dd7073a0fc9e9698904325dea9537c5b Author: Julius Unverfehrt Date: Fri Jul 1 08:06:42 2022 +0200 fixed duplicate logging commit 6c025409415329028f697bb99986cd0912c7ed54 Author: Julius Unverfehrt Date: Thu Jun 30 17:10:32 2022 +0200 add pyinfra mock script --- Dockerfile | 2 +- cv_analysis/server/format.py | 111 ++++++++++++++++++ .../server/{pyinfra_compat.py => stream.py} | 24 ++-- cv_analysis/utils/banner.py | 21 ++-- cv_analysis/utils/logging.py | 3 - cv_analysis/utils/structures.py | 34 +++--- incl/pyinfra | 2 +- scripts/client_mock.py | 61 ---------- scripts/pyinfra_mock.py | 69 +++++++++++ src/serve.py | 2 +- test/fixtures/server.py | 38 +++--- ...at_test.py => formatted_stream_fn_test.py} | 2 +- 12 files changed, 247 insertions(+), 122 deletions(-) create mode 100644 cv_analysis/server/format.py rename cv_analysis/server/{pyinfra_compat.py => stream.py} (60%) delete mode 100644 scripts/client_mock.py create mode 100644 scripts/pyinfra_mock.py rename test/unit_tests/server/{pyinfra_compat_test.py => formatted_stream_fn_test.py} (85%) diff --git a/Dockerfile b/Dockerfile index 19f3b04..4fa9003 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,4 +17,4 @@ WORKDIR /app/service EXPOSE 5000 EXPOSE 8080 -CMD ["python3", "src/run_service.py"] \ No newline at end of file +CMD ["python3", "src/serve.py"] \ No newline at end of file diff --git a/cv_analysis/server/format.py b/cv_analysis/server/format.py new file mode 100644 index 0000000..6e00991 --- /dev/null +++ b/cv_analysis/server/format.py @@ -0,0 +1,111 @@ +from _operator import itemgetter +from functools import partial + +import numpy as np + +from cv_analysis.utils.structures import Rectangle + + +def make_formatter(dpi, page_size, rotation): + rotation = rotation // 90 if rotation not in [0, 1, 2, 3] else rotation + + def format_(key2pixel): + convert = partial(convert_pixel_to_inch, dpi=dpi) + x, y, w, h = map(convert, itemgetter("x", "y", "width", "height")(key2pixel)) + x1, y1 = x + w, y + h + matrix = np.vstack([[x, y], [x1, y1]]).T + new_matrix = rotate_and_shift(matrix, rotation, page_size) + x1, x2 = sorted(new_matrix[0, :]) + y1, y2 = sorted(new_matrix[1, :]) + return Rectangle.from_xyxy((x1, y1, x2, y2), discrete=False).json_xywh() + + return format_ + + +def convert_pixel_to_inch(pixel, dpi): + return pixel / dpi * 72 + + +def rotate(input_matrix, radians): + rotation_matrix = np.vstack([[np.cos(radians), -np.sin(radians)], [np.sin(radians), np.cos(radians)]]) + + return np.dot(rotation_matrix, input_matrix) + + +def rotate_and_shift(matrix, rotation, size, debug=False): + """Rotates a matrix against (!) a specified rotation. That is, the rotation is applied negatively. The matrix is + also shifted to ensure it contains points (columns) in quadrant I. + + Procedure: + 1) Rotate the matrix clockwise according to rotation value + 2) Shift the matrix back into quadrant I + 3) Set x_i and y_i to new lower left and upper right corners, since the corner vectors are no longer at these + corners due to the rotation + + Args: + matrix: matrix to transform + rotation: any of 0, 1, 2, or 3, where 1 = 90 degree CLOCKWISE rotation etc. + size: the size of the page as a tuple (, ) + debug: Visualizes the transformations for later re-understanding of the code + """ + + def shift_to_quadrant_1(matrix): + + # TODO: generalize + if rotation == 0: + back_shift = np.zeros_like(np.eye(2)) + elif rotation == 1: + back_shift = np.array([[0, 0], [1, 1]]) * size[1] + elif rotation == 2: + back_shift = np.array([[1, 1], [1, 1]]) * size + elif rotation == 3: + back_shift = np.array([[1, 1], [0, 0]]) * size[0] + else: + raise ValueError(f"Unexpected rotation value '{rotation}'. Expected any of 0, 1, 2, or 3.") + + matrix_shifted = matrix + back_shift + return matrix_shifted + + # PDF rotations are clockwise, hence subtract the radian value of the rotation from 2 pi + radians = (2 * np.pi) - (np.pi * (rotation / 2)) + matrix_rotated = rotate(matrix, radians) + matrix_rotated_and_shifted = shift_to_quadrant_1(matrix_rotated) + + if debug: + __show_matrices(size, radians, matrix, matrix_rotated, matrix_rotated_and_shifted) + return matrix_rotated_and_shifted + + +def __show_matrices(size, radians, matrix, matrix_rotated, matrix_rotated_and_shifted): + + import matplotlib.pyplot as plt + from copy import deepcopy + + m1 = matrix + m2 = matrix_rotated + m3 = matrix_rotated_and_shifted + + m1, m2, m3 = map(deepcopy, (m1, m2, m3)) + + frame = np.eye(2) * size + frame_rotated = rotate(frame, radians) + + f1 = frame + f2 = frame_rotated + + f1 *= 0.005 * 1 + f2 *= 0.005 * 1 + m1 *= 0.005 * 1 + m2 *= 0.005 * 1 + m3 *= 0.005 * 1 + + fig, axes = plt.subplots(1, 2, figsize=(8, 4)) + axes = axes.ravel() + + axes[0].quiver([0, 0], [0, 0], f1[0, :], f1[1, :], scale=5, scale_units="inches", color="red") + axes[1].quiver([0, 0], [0, 0], f2[0, :], f2[1, :], scale=5, scale_units="inches", color="red") + axes[0].quiver([0, 0], [0, 0], m1[0, :], m1[1, :], scale=5, scale_units="inches") + axes[1].quiver([0, 0], [0, 0], m2[0, :], m2[1, :], scale=5, scale_units="inches", color="green") + axes[1].quiver([0, 0], [0, 0], m3[0, :], m3[1, :], scale=5, scale_units="inches", color="blue") + + plt.show() diff --git a/cv_analysis/server/pyinfra_compat.py b/cv_analysis/server/stream.py similarity index 60% rename from cv_analysis/server/pyinfra_compat.py rename to cv_analysis/server/stream.py index e9e8b7b..3424d4f 100644 --- a/cv_analysis/server/pyinfra_compat.py +++ b/cv_analysis/server/stream.py @@ -1,9 +1,11 @@ import gzip +from operator import itemgetter from typing import Callable from funcy import lmap from pyinfra.server.utils import make_streamable_and_wrap_in_packing_logic +from cv_analysis.server.format import make_formatter from cv_analysis.utils.preprocessing import open_img_from_bytes @@ -19,18 +21,18 @@ def make_streamable_analysis_fn(analysis_fn: Callable): wrapped function """ - def analyse(data, metadata: dict): - def format_results(): - return { - **metadata, - "pageWidth": image.shape[1], - "pageHeight": image.shape[0], - "cells": results, - } + def analyse(data: bytes, metadata: dict): image = open_img_from_bytes(gzip.decompress(data)) - results = lmap(lambda x: x.json_xywh(), analysis_fn(image)) - results_metadata = format_results() if results else {} - return b"", results_metadata + + dpi = metadata["image_info"]["dpi"] + width, height, rotation = itemgetter("width", "height", "rotation")(metadata["page_info"]) + + formatter = make_formatter(dpi, (width, height), rotation) + + results = map(lambda x: x.json_xywh(), analysis_fn(image)) + results = {"cells": (lmap(formatter, results))} + + return b"", {**metadata, **results} return make_streamable_and_wrap_in_packing_logic(analyse, batched=False) diff --git a/cv_analysis/utils/banner.py b/cv_analysis/utils/banner.py index a483707..4ae6c8a 100644 --- a/cv_analysis/utils/banner.py +++ b/cv_analysis/utils/banner.py @@ -1,16 +1,13 @@ def make_art(): art = r""" - __ __ - | \ | \ - _______ __ __ ______ _______ ______ | $$ __ __ _______ \$$ _______ - / \| \ / \ ______ | \ | \ | \ | $$| \ | \ / \| \ / \ -| $$$$$$$ \$$\ / $$| \ \$$$$$$\| $$$$$$$\ \$$$$$$\| $$| $$ | $$| $$$$$$$| $$| $$$$$$$ -| $$ \$$\ $$ \$$$$$$/ $$| $$ | $$ / $$| $$| $$ | $$ \$$ \ | $$ \$$ \ -| $$_____ \$$ $$ | $$$$$$$| $$ | $$| $$$$$$$| $$| $$__/ $$ _\$$$$$$\| $$ _\$$$$$$\ - \$$ \ \$$$ \$$ $$| $$ | $$ \$$ $$| $$ \$$ $$| $$| $$| $$ - \$$$$$$$ \$ \$$$$$$$ \$$ \$$ \$$$$$$$ \$$ _\$$$$$$$ \$$$$$$$ \$$ \$$$$$$$ - | \__| $$ - \$$ $$ - \$$$$$$ + __ + _ |@@| + / \ \--/ __ .__ .__ + ) O|----| | __ ___ __ _____ ____ _____ | | ___.__. _____|__| ______ + / / \ }{ /\ )_ / _\\ \/ / ______ \__ \ / \\__ \ | | | | |/ ___/ |/ ___/ + )/ /\__/\ \__O (__ \ / /_____/ / __ \| | \/ __ \| |_\___ |\___ \| |\___ \ +|/ (--/\--) \__/ \_/ (______/___|__(______/____/\____/_____/|__/_____/ +/ _)( )(_ + `---''---` """ return art diff --git a/cv_analysis/utils/logging.py b/cv_analysis/utils/logging.py index 792a3fa..ad7862e 100644 --- a/cv_analysis/utils/logging.py +++ b/cv_analysis/utils/logging.py @@ -17,6 +17,3 @@ def get_logger(): logger.addHandler(ch) logger.propagate = False return logger - - -logger = get_logger() diff --git a/cv_analysis/utils/structures.py b/cv_analysis/utils/structures.py index f6c3c6e..adec723 100644 --- a/cv_analysis/utils/structures.py +++ b/cv_analysis/utils/structures.py @@ -1,21 +1,25 @@ from json import dumps +import numpy as np +from funcy import identity + class Rectangle: - def __init__(self, x1=None, y1=None, w=None, h=None, x2=None, y2=None, indent=4, format="xywh"): + def __init__(self, x1=None, y1=None, w=None, h=None, x2=None, y2=None, indent=4, format="xywh", discrete=True): + make_discrete = int if discrete else identity try: - self.x1 = int(x1) - self.y1 = int(y1) - self.w = int(w) if w else int(x2 - x1) - self.h = int(h) if h else int(y2 - y1) - self.x2 = int(x2) if x2 else self.x1 + self.w - self.y2 = int(y2) if y2 else self.y1 + self.h - assert (self.x1 + self.w) == self.x2 - assert (self.y1 + self.h) == self.y2 + self.x1 = make_discrete(x1) + self.y1 = make_discrete(y1) + self.w = make_discrete(w) if w else make_discrete(x2 - x1) + self.h = make_discrete(h) if h else make_discrete(y2 - y1) + self.x2 = make_discrete(x2) if x2 else self.x1 + self.w + self.y2 = make_discrete(y2) if y2 else self.y1 + self.h + assert np.isclose(self.x1 + self.w, self.x2) + assert np.isclose(self.y1 + self.h, self.y2) self.indent = indent self.format = format - except: - raise Exception("x1, y1, (w|x2), and (h|y2) must be defined.") + except Exception as err: + raise Exception("x1, y1, (w|x2), and (h|y2) must be defined.") from err def json_xywh(self): return {"x": self.x1, "y": self.y1, "width": self.w, "height": self.h} @@ -37,14 +41,14 @@ class Rectangle: return self.x1, self.y1, self.w, self.h @classmethod - def from_xyxy(cls, xyxy_tuple): + def from_xyxy(cls, xyxy_tuple, discrete=True): x1, y1, x2, y2 = xyxy_tuple - return cls(x1=x1, y1=y1, x2=x2, y2=y2) + return cls(x1=x1, y1=y1, x2=x2, y2=y2, discrete=discrete) @classmethod - def from_xywh(cls, xywh_tuple): + def from_xywh(cls, xywh_tuple, discrete=True): x, y, w, h = xywh_tuple - return cls(x1=x, y1=y, w=w, h=h) + return cls(x1=x, y1=y, w=w, h=h, discrete=discrete) def __str__(self): return dumps(self.json(), indent=self.indent) diff --git a/incl/pyinfra b/incl/pyinfra index fb0b64f..1e70d49 160000 --- a/incl/pyinfra +++ b/incl/pyinfra @@ -1 +1 @@ -Subproject commit fb0b64f8d55933e9651fde5eec0175e7d317655d +Subproject commit 1e70d49531e89613c70903be49290b94ee014f65 diff --git a/scripts/client_mock.py b/scripts/client_mock.py deleted file mode 100644 index a64fe95..0000000 --- a/scripts/client_mock.py +++ /dev/null @@ -1,61 +0,0 @@ -# python client_mock.py --pdf_path=/home/iriley/Documents/pdfs/unscanned/06.pdf --operations=table-parsing -import argparse -import json -from multiprocessing.sharedctypes import Value -import requests - -from cv_analysis.utils.preprocessing import open_pdf - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--pdf_path", required=True, help="path to PDF file") - parser.add_argument( - "--first_page", type=int, required=False, default=0, help="page number from which to start (starts at 0)" - ) - parser.add_argument( - "--last_page", - type=int, - required=False, - default=None, - help="page number at which to stop (non-inclusive); specify None to go to the end", - ) - parser.add_argument( - "--operations", - type=str, - required=False, - help="Comma-separated list of operations, any of the following: \ntable-parsing\nredaction-detection\ - \nfigure-detection\nlayout-detection", - default="table-parsing", - ) - args = parser.parse_args() - - return args - - -def main(args): - - operations = args.operations.split(",") - for operation in operations: - print("****************************") - print(f"{' '+operation+' ':^27}") - print("****************************") - if operation == "table-parsing": - response = requests.post("http://127.0.0.1:5000/tables", data=open(args.pdf_path, "rb")) - elif operation == "redaction-detection": - response = requests.post("http://127.0.0.1:5000/redactions", data=open(args.pdf_path, "rb")) - elif operation == "figure-detection": - response = requests.post("http://127.0.0.1:5000/figures", data=open(args.pdf_path, "rb")) - elif operation == "layout-parsing": - response = requests.post("http://127.0.0.1:5000/layout", data=open(args.pdf_path, "rb")) - else: - raise ValueError(f"{args.operation} is not a valid value.") - response.raise_for_status() - predictions = response.json() - - print(json.dumps(predictions, indent=2)) - - -if __name__ == "__main__": - args = parse_args() - main(args) diff --git a/scripts/pyinfra_mock.py b/scripts/pyinfra_mock.py new file mode 100644 index 0000000..1717521 --- /dev/null +++ b/scripts/pyinfra_mock.py @@ -0,0 +1,69 @@ +import argparse +import base64 +import gzip +import io +import json +from operator import itemgetter +from typing import List + +import fitz +import pdf2image +from PIL import Image +from funcy import lmap, compose, pluck +from funcy import lpluck + +from pyinfra.default_objects import get_component_factory + +from cv_analysis.config import CONFIG +from incl.pyinfra.test.utils.image import image_to_bytes + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--pdf_path", "-p", required=True) + parser.add_argument("--operation", "-o", choices=["figure_detection", "table_parsing"], required=True) + parser.add_argument("--result_path", "-r", required=True) + args = parser.parse_args() + + return args + + +def request_metadatas(dpi, n_metadata): + return [{"dpi": dpi} for _ in range(1, n_metadata)] + + +def draw_cells_on_page(cells: List[dict], page): + def format_xywh_to_x0y0x1y1(rect): + x, y, w, h = rect + return x, y, x + w, y + h + + rects = map(itemgetter("x", "y", "width", "height"), cells) + rects = map(format_xywh_to_x0y0x1y1, rects) + + for rect in rects: + page.draw_rect(rect, color=(0.3, 0.7, 0.1), width=2, overlay=True) + + +def annotate_results_on_pdf(results, pdf_path, result_path): + open_pdf = fitz.open(pdf_path) + metadata_per_page = pluck("metadata", results) + + for page, metadata in zip(open_pdf, metadata_per_page): + if metadata: + draw_cells_on_page(metadata["cells"], page) + open_pdf.save(result_path) + + +def main(args): + dpi = 200 + images = lmap(compose(gzip.compress, image_to_bytes), pdf2image.convert_from_path(args.pdf_path, dpi=dpi)) + + submit_endpoint = f"http://{CONFIG.webserver.host}:{CONFIG.webserver.port}/{args.operation}" + pipeline = get_component_factory(CONFIG).get_pipeline(submit_endpoint) + results = list(pipeline(data=images, metadata=request_metadatas(dpi, len(images)))) + + annotate_results_on_pdf(results, args.pdf_path, args.result_path) + + +if __name__ == "__main__": + main(parse_args()) diff --git a/src/serve.py b/src/serve.py index a5481e9..52513f7 100644 --- a/src/serve.py +++ b/src/serve.py @@ -4,7 +4,7 @@ from waitress import serve from cv_analysis.config import CONFIG from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline -from cv_analysis.server.pyinfra_compat import make_streamable_analysis_fn +from cv_analysis.server.stream import make_streamable_analysis_fn from cv_analysis.table_parsing import parse_tables from cv_analysis.utils.banner import make_art from cv_analysis.utils.logging import get_logger diff --git a/test/fixtures/server.py b/test/fixtures/server.py index 86aa1c1..4c959e7 100644 --- a/test/fixtures/server.py +++ b/test/fixtures/server.py @@ -6,9 +6,8 @@ import pytest from PIL import Image from funcy import first -from cv_analysis.utils.preprocessing import open_img_from_bytes from cv_analysis.utils.structures import Rectangle -from incl.pyinfra.pyinfra.server.packing import bytes_to_string, string_to_bytes +from incl.pyinfra.pyinfra.server.packing import bytes_to_string @pytest.fixture @@ -22,26 +21,33 @@ def random_image_as_bytes_and_compressed(random_image): @pytest.fixture def random_image_metadata_package(random_image_as_bytes_and_compressed): data = bytes_to_string(random_image_as_bytes_and_compressed) - return [{"data": data, "metadata": {"key": "value", "key2": "value2"}}] + return [ + { + "data": data, + "metadata": { + "page_info": {"width": 1000, "height": 2000, "rotation": 90}, + "image_info": {"dpi": 200}, + }, + } + ] @pytest.fixture -def expected_analyse_metadata(operation, random_image_metadata_package): +def expected_analyse_metadata(operation, random_image_metadata_package, image_size): metadata = first(random_image_metadata_package) - image = open_img_from_bytes(gzip.decompress(string_to_bytes(metadata["data"]))) - wrapped_metadata = { - **metadata["metadata"], - "pageWidth": image.shape[1], - "pageHeight": image.shape[0], - } + metadata = metadata["metadata"] + + if image_size == (200, 200): + result_metadata = {"cells": [{"height": 72.0, "width": 71.99999999999999, "x": 0.0, "y": 1928.0}]} + elif image_size == (500, 500): + result_metadata = {"cells": [{"height": 180.0, "width": 179.99999999999997, "x": 0.0, "y": 1820.0}]} + elif image_size == (800, 800): + result_metadata = {"cells": [{"height": 288.0, "width": 287.99999999999994, "x": 0.0, "y": 1712.0}]} + else: + result_metadata = {} if operation == "mock": - return { - **wrapped_metadata, - "cells": [{"x": 0, "y": 0, "width": image.shape[1], "height": image.shape[0]}], - } - if operation == "table_parsing": - return {} + return {**metadata, **result_metadata} @pytest.fixture diff --git a/test/unit_tests/server/pyinfra_compat_test.py b/test/unit_tests/server/formatted_stream_fn_test.py similarity index 85% rename from test/unit_tests/server/pyinfra_compat_test.py rename to test/unit_tests/server/formatted_stream_fn_test.py index d2f0690..88526fe 100644 --- a/test/unit_tests/server/pyinfra_compat_test.py +++ b/test/unit_tests/server/formatted_stream_fn_test.py @@ -1,7 +1,7 @@ import pytest from funcy import first -from cv_analysis.server.pyinfra_compat import make_streamable_analysis_fn +from cv_analysis.server.stream import make_streamable_analysis_fn @pytest.mark.parametrize("operation", ["mock"])