Pull request #13: Add pdf coord conversion

Merge in RR/cv-analysis from add-pdf-coord-conversion to master

Squashed commit of the following:

commit f56b7b45feb78142b032ef0faae2ca8dd020e6c5
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Thu Jul 7 11:26:46 2022 +0200

    update pyinfra

commit 9086ef0a2059688fb8dd5559cda831bbbd36362b
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Thu Jul 7 11:21:53 2022 +0200

    update inpout metadata keys

commit 55f147a5848e22ea62242ea883a0ce53ef1c04a5
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Thu Jul 7 09:16:16 2022 +0200

    update to new input metadata signature

commit df4652fb027f734f2613e4adb7bc5b17edee62e9
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Wed Jul 6 16:55:36 2022 +0200

    refactor

commit e52c674085a9c7411c55a2e0993aa34622284317
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Wed Jul 6 16:15:21 2022 +0200

    update build script, refactor

commit 1f874aea591f25544aaa3f39a4e38fa50a24615e
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Tue Jul 5 17:01:15 2022 +0200

    add rotation formatter

commit b78a69741287a4cd38a90ace98f67e8f1b803737
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Tue Jul 5 09:26:27 2022 +0200

    refactor

commit b3155b8e072530f99114f3ee9135e73afc8f85cb
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Fri Jul 1 15:06:45 2022 +0200

    made assertion robust to floating point precision

commit 4169102a6b5053500a3db2d789d265c2c77d56a4
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Fri Jul 1 15:06:01 2022 +0200

    improve banner

commit dea74593d925c802489e5400297b48a9729038f0
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Fri Jul 1 14:28:08 2022 +0200

    introduce derotation logic for rectangles from rotated pdfs, introduce continious option for coordinates in Rectangle class

commit d07e1dc2731ea7ae9887cc02bb98155bf1565a0d
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Fri Jul 1 10:39:38 2022 +0200

    introduce table parsing formatter to convert pixel values to inches

commit 67ff6730dd7073a0fc9e9698904325dea9537c5b
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Fri Jul 1 08:06:42 2022 +0200

    fixed duplicate logging

commit 6c025409415329028f697bb99986cd0912c7ed54
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date:   Thu Jun 30 17:10:32 2022 +0200

    add pyinfra mock script
This commit is contained in:
Julius Unverfehrt 2022-07-07 11:35:12 +02:00
parent fc8a9e15f8
commit f37b6d7d8e
12 changed files with 247 additions and 122 deletions

View File

@ -17,4 +17,4 @@ WORKDIR /app/service
EXPOSE 5000
EXPOSE 8080
CMD ["python3", "src/run_service.py"]
CMD ["python3", "src/serve.py"]

View File

@ -0,0 +1,111 @@
from _operator import itemgetter
from functools import partial
import numpy as np
from cv_analysis.utils.structures import Rectangle
def make_formatter(dpi, page_size, rotation):
rotation = rotation // 90 if rotation not in [0, 1, 2, 3] else rotation
def format_(key2pixel):
convert = partial(convert_pixel_to_inch, dpi=dpi)
x, y, w, h = map(convert, itemgetter("x", "y", "width", "height")(key2pixel))
x1, y1 = x + w, y + h
matrix = np.vstack([[x, y], [x1, y1]]).T
new_matrix = rotate_and_shift(matrix, rotation, page_size)
x1, x2 = sorted(new_matrix[0, :])
y1, y2 = sorted(new_matrix[1, :])
return Rectangle.from_xyxy((x1, y1, x2, y2), discrete=False).json_xywh()
return format_
def convert_pixel_to_inch(pixel, dpi):
return pixel / dpi * 72
def rotate(input_matrix, radians):
rotation_matrix = np.vstack([[np.cos(radians), -np.sin(radians)], [np.sin(radians), np.cos(radians)]])
return np.dot(rotation_matrix, input_matrix)
def rotate_and_shift(matrix, rotation, size, debug=False):
"""Rotates a matrix against (!) a specified rotation. That is, the rotation is applied negatively. The matrix is
also shifted to ensure it contains points (columns) in quadrant I.
Procedure:
1) Rotate the matrix clockwise according to rotation value
2) Shift the matrix back into quadrant I
3) Set x_i and y_i to new lower left and upper right corners, since the corner vectors are no longer at these
corners due to the rotation
Args:
matrix: matrix to transform
rotation: any of 0, 1, 2, or 3, where 1 = 90 degree CLOCKWISE rotation etc.
size: the size of the page as a tuple (<width>, <height>)
debug: Visualizes the transformations for later re-understanding of the code
"""
def shift_to_quadrant_1(matrix):
# TODO: generalize
if rotation == 0:
back_shift = np.zeros_like(np.eye(2))
elif rotation == 1:
back_shift = np.array([[0, 0], [1, 1]]) * size[1]
elif rotation == 2:
back_shift = np.array([[1, 1], [1, 1]]) * size
elif rotation == 3:
back_shift = np.array([[1, 1], [0, 0]]) * size[0]
else:
raise ValueError(f"Unexpected rotation value '{rotation}'. Expected any of 0, 1, 2, or 3.")
matrix_shifted = matrix + back_shift
return matrix_shifted
# PDF rotations are clockwise, hence subtract the radian value of the rotation from 2 pi
radians = (2 * np.pi) - (np.pi * (rotation / 2))
matrix_rotated = rotate(matrix, radians)
matrix_rotated_and_shifted = shift_to_quadrant_1(matrix_rotated)
if debug:
__show_matrices(size, radians, matrix, matrix_rotated, matrix_rotated_and_shifted)
return matrix_rotated_and_shifted
def __show_matrices(size, radians, matrix, matrix_rotated, matrix_rotated_and_shifted):
import matplotlib.pyplot as plt
from copy import deepcopy
m1 = matrix
m2 = matrix_rotated
m3 = matrix_rotated_and_shifted
m1, m2, m3 = map(deepcopy, (m1, m2, m3))
frame = np.eye(2) * size
frame_rotated = rotate(frame, radians)
f1 = frame
f2 = frame_rotated
f1 *= 0.005 * 1
f2 *= 0.005 * 1
m1 *= 0.005 * 1
m2 *= 0.005 * 1
m3 *= 0.005 * 1
fig, axes = plt.subplots(1, 2, figsize=(8, 4))
axes = axes.ravel()
axes[0].quiver([0, 0], [0, 0], f1[0, :], f1[1, :], scale=5, scale_units="inches", color="red")
axes[1].quiver([0, 0], [0, 0], f2[0, :], f2[1, :], scale=5, scale_units="inches", color="red")
axes[0].quiver([0, 0], [0, 0], m1[0, :], m1[1, :], scale=5, scale_units="inches")
axes[1].quiver([0, 0], [0, 0], m2[0, :], m2[1, :], scale=5, scale_units="inches", color="green")
axes[1].quiver([0, 0], [0, 0], m3[0, :], m3[1, :], scale=5, scale_units="inches", color="blue")
plt.show()

View File

@ -1,9 +1,11 @@
import gzip
from operator import itemgetter
from typing import Callable
from funcy import lmap
from pyinfra.server.utils import make_streamable_and_wrap_in_packing_logic
from cv_analysis.server.format import make_formatter
from cv_analysis.utils.preprocessing import open_img_from_bytes
@ -19,18 +21,18 @@ def make_streamable_analysis_fn(analysis_fn: Callable):
wrapped function
"""
def analyse(data, metadata: dict):
def format_results():
return {
**metadata,
"pageWidth": image.shape[1],
"pageHeight": image.shape[0],
"cells": results,
}
def analyse(data: bytes, metadata: dict):
image = open_img_from_bytes(gzip.decompress(data))
results = lmap(lambda x: x.json_xywh(), analysis_fn(image))
results_metadata = format_results() if results else {}
return b"", results_metadata
dpi = metadata["image_info"]["dpi"]
width, height, rotation = itemgetter("width", "height", "rotation")(metadata["page_info"])
formatter = make_formatter(dpi, (width, height), rotation)
results = map(lambda x: x.json_xywh(), analysis_fn(image))
results = {"cells": (lmap(formatter, results))}
return b"", {**metadata, **results}
return make_streamable_and_wrap_in_packing_logic(analyse, batched=False)

View File

@ -1,16 +1,13 @@
def make_art():
art = r"""
__ __
| \ | \
_______ __ __ ______ _______ ______ | $$ __ __ _______ \$$ _______
/ \| \ / \ ______ | \ | \ | \ | $$| \ | \ / \| \ / \
| $$$$$$$ \$$\ / $$| \ \$$$$$$\| $$$$$$$\ \$$$$$$\| $$| $$ | $$| $$$$$$$| $$| $$$$$$$
| $$ \$$\ $$ \$$$$$$/ $$| $$ | $$ / $$| $$| $$ | $$ \$$ \ | $$ \$$ \
| $$_____ \$$ $$ | $$$$$$$| $$ | $$| $$$$$$$| $$| $$__/ $$ _\$$$$$$\| $$ _\$$$$$$\
\$$ \ \$$$ \$$ $$| $$ | $$ \$$ $$| $$ \$$ $$| $$| $$| $$
\$$$$$$$ \$ \$$$$$$$ \$$ \$$ \$$$$$$$ \$$ _\$$$$$$$ \$$$$$$$ \$$ \$$$$$$$
| \__| $$
\$$ $$
\$$$$$$
__
_ |@@|
/ \ \--/ __ .__ .__
) O|----| | __ ___ __ _____ ____ _____ | | ___.__. _____|__| ______
/ / \ }{ /\ )_ / _\\ \/ / ______ \__ \ / \\__ \ | | | | |/ ___/ |/ ___/
)/ /\__/\ \__O (__ \ / /_____/ / __ \| | \/ __ \| |_\___ |\___ \| |\___ \
|/ (--/\--) \__/ \_/ (______/___|__(______/____/\____/_____/|__/_____/
/ _)( )(_
`---''---`
"""
return art

View File

@ -17,6 +17,3 @@ def get_logger():
logger.addHandler(ch)
logger.propagate = False
return logger
logger = get_logger()

View File

@ -1,21 +1,25 @@
from json import dumps
import numpy as np
from funcy import identity
class Rectangle:
def __init__(self, x1=None, y1=None, w=None, h=None, x2=None, y2=None, indent=4, format="xywh"):
def __init__(self, x1=None, y1=None, w=None, h=None, x2=None, y2=None, indent=4, format="xywh", discrete=True):
make_discrete = int if discrete else identity
try:
self.x1 = int(x1)
self.y1 = int(y1)
self.w = int(w) if w else int(x2 - x1)
self.h = int(h) if h else int(y2 - y1)
self.x2 = int(x2) if x2 else self.x1 + self.w
self.y2 = int(y2) if y2 else self.y1 + self.h
assert (self.x1 + self.w) == self.x2
assert (self.y1 + self.h) == self.y2
self.x1 = make_discrete(x1)
self.y1 = make_discrete(y1)
self.w = make_discrete(w) if w else make_discrete(x2 - x1)
self.h = make_discrete(h) if h else make_discrete(y2 - y1)
self.x2 = make_discrete(x2) if x2 else self.x1 + self.w
self.y2 = make_discrete(y2) if y2 else self.y1 + self.h
assert np.isclose(self.x1 + self.w, self.x2)
assert np.isclose(self.y1 + self.h, self.y2)
self.indent = indent
self.format = format
except:
raise Exception("x1, y1, (w|x2), and (h|y2) must be defined.")
except Exception as err:
raise Exception("x1, y1, (w|x2), and (h|y2) must be defined.") from err
def json_xywh(self):
return {"x": self.x1, "y": self.y1, "width": self.w, "height": self.h}
@ -37,14 +41,14 @@ class Rectangle:
return self.x1, self.y1, self.w, self.h
@classmethod
def from_xyxy(cls, xyxy_tuple):
def from_xyxy(cls, xyxy_tuple, discrete=True):
x1, y1, x2, y2 = xyxy_tuple
return cls(x1=x1, y1=y1, x2=x2, y2=y2)
return cls(x1=x1, y1=y1, x2=x2, y2=y2, discrete=discrete)
@classmethod
def from_xywh(cls, xywh_tuple):
def from_xywh(cls, xywh_tuple, discrete=True):
x, y, w, h = xywh_tuple
return cls(x1=x, y1=y, w=w, h=h)
return cls(x1=x, y1=y, w=w, h=h, discrete=discrete)
def __str__(self):
return dumps(self.json(), indent=self.indent)

@ -1 +1 @@
Subproject commit fb0b64f8d55933e9651fde5eec0175e7d317655d
Subproject commit 1e70d49531e89613c70903be49290b94ee014f65

View File

@ -1,61 +0,0 @@
# python client_mock.py --pdf_path=/home/iriley/Documents/pdfs/unscanned/06.pdf --operations=table-parsing
import argparse
import json
from multiprocessing.sharedctypes import Value
import requests
from cv_analysis.utils.preprocessing import open_pdf
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--pdf_path", required=True, help="path to PDF file")
parser.add_argument(
"--first_page", type=int, required=False, default=0, help="page number from which to start (starts at 0)"
)
parser.add_argument(
"--last_page",
type=int,
required=False,
default=None,
help="page number at which to stop (non-inclusive); specify None to go to the end",
)
parser.add_argument(
"--operations",
type=str,
required=False,
help="Comma-separated list of operations, any of the following: \ntable-parsing\nredaction-detection\
\nfigure-detection\nlayout-detection",
default="table-parsing",
)
args = parser.parse_args()
return args
def main(args):
operations = args.operations.split(",")
for operation in operations:
print("****************************")
print(f"{' '+operation+' ':^27}")
print("****************************")
if operation == "table-parsing":
response = requests.post("http://127.0.0.1:5000/tables", data=open(args.pdf_path, "rb"))
elif operation == "redaction-detection":
response = requests.post("http://127.0.0.1:5000/redactions", data=open(args.pdf_path, "rb"))
elif operation == "figure-detection":
response = requests.post("http://127.0.0.1:5000/figures", data=open(args.pdf_path, "rb"))
elif operation == "layout-parsing":
response = requests.post("http://127.0.0.1:5000/layout", data=open(args.pdf_path, "rb"))
else:
raise ValueError(f"{args.operation} is not a valid value.")
response.raise_for_status()
predictions = response.json()
print(json.dumps(predictions, indent=2))
if __name__ == "__main__":
args = parse_args()
main(args)

69
scripts/pyinfra_mock.py Normal file
View File

@ -0,0 +1,69 @@
import argparse
import base64
import gzip
import io
import json
from operator import itemgetter
from typing import List
import fitz
import pdf2image
from PIL import Image
from funcy import lmap, compose, pluck
from funcy import lpluck
from pyinfra.default_objects import get_component_factory
from cv_analysis.config import CONFIG
from incl.pyinfra.test.utils.image import image_to_bytes
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--pdf_path", "-p", required=True)
parser.add_argument("--operation", "-o", choices=["figure_detection", "table_parsing"], required=True)
parser.add_argument("--result_path", "-r", required=True)
args = parser.parse_args()
return args
def request_metadatas(dpi, n_metadata):
return [{"dpi": dpi} for _ in range(1, n_metadata)]
def draw_cells_on_page(cells: List[dict], page):
def format_xywh_to_x0y0x1y1(rect):
x, y, w, h = rect
return x, y, x + w, y + h
rects = map(itemgetter("x", "y", "width", "height"), cells)
rects = map(format_xywh_to_x0y0x1y1, rects)
for rect in rects:
page.draw_rect(rect, color=(0.3, 0.7, 0.1), width=2, overlay=True)
def annotate_results_on_pdf(results, pdf_path, result_path):
open_pdf = fitz.open(pdf_path)
metadata_per_page = pluck("metadata", results)
for page, metadata in zip(open_pdf, metadata_per_page):
if metadata:
draw_cells_on_page(metadata["cells"], page)
open_pdf.save(result_path)
def main(args):
dpi = 200
images = lmap(compose(gzip.compress, image_to_bytes), pdf2image.convert_from_path(args.pdf_path, dpi=dpi))
submit_endpoint = f"http://{CONFIG.webserver.host}:{CONFIG.webserver.port}/{args.operation}"
pipeline = get_component_factory(CONFIG).get_pipeline(submit_endpoint)
results = list(pipeline(data=images, metadata=request_metadatas(dpi, len(images))))
annotate_results_on_pdf(results, args.pdf_path, args.result_path)
if __name__ == "__main__":
main(parse_args())

View File

@ -4,7 +4,7 @@ from waitress import serve
from cv_analysis.config import CONFIG
from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline
from cv_analysis.server.pyinfra_compat import make_streamable_analysis_fn
from cv_analysis.server.stream import make_streamable_analysis_fn
from cv_analysis.table_parsing import parse_tables
from cv_analysis.utils.banner import make_art
from cv_analysis.utils.logging import get_logger

View File

@ -6,9 +6,8 @@ import pytest
from PIL import Image
from funcy import first
from cv_analysis.utils.preprocessing import open_img_from_bytes
from cv_analysis.utils.structures import Rectangle
from incl.pyinfra.pyinfra.server.packing import bytes_to_string, string_to_bytes
from incl.pyinfra.pyinfra.server.packing import bytes_to_string
@pytest.fixture
@ -22,26 +21,33 @@ def random_image_as_bytes_and_compressed(random_image):
@pytest.fixture
def random_image_metadata_package(random_image_as_bytes_and_compressed):
data = bytes_to_string(random_image_as_bytes_and_compressed)
return [{"data": data, "metadata": {"key": "value", "key2": "value2"}}]
return [
{
"data": data,
"metadata": {
"page_info": {"width": 1000, "height": 2000, "rotation": 90},
"image_info": {"dpi": 200},
},
}
]
@pytest.fixture
def expected_analyse_metadata(operation, random_image_metadata_package):
def expected_analyse_metadata(operation, random_image_metadata_package, image_size):
metadata = first(random_image_metadata_package)
image = open_img_from_bytes(gzip.decompress(string_to_bytes(metadata["data"])))
wrapped_metadata = {
**metadata["metadata"],
"pageWidth": image.shape[1],
"pageHeight": image.shape[0],
}
metadata = metadata["metadata"]
if image_size == (200, 200):
result_metadata = {"cells": [{"height": 72.0, "width": 71.99999999999999, "x": 0.0, "y": 1928.0}]}
elif image_size == (500, 500):
result_metadata = {"cells": [{"height": 180.0, "width": 179.99999999999997, "x": 0.0, "y": 1820.0}]}
elif image_size == (800, 800):
result_metadata = {"cells": [{"height": 288.0, "width": 287.99999999999994, "x": 0.0, "y": 1712.0}]}
else:
result_metadata = {}
if operation == "mock":
return {
**wrapped_metadata,
"cells": [{"x": 0, "y": 0, "width": image.shape[1], "height": image.shape[0]}],
}
if operation == "table_parsing":
return {}
return {**metadata, **result_metadata}
@pytest.fixture

View File

@ -1,7 +1,7 @@
import pytest
from funcy import first
from cv_analysis.server.pyinfra_compat import make_streamable_analysis_fn
from cv_analysis.server.stream import make_streamable_analysis_fn
@pytest.mark.parametrize("operation", ["mock"])