From cd5457840b6bd378112e48c454a2a3a46f645a60 Mon Sep 17 00:00:00 2001 From: Matthias Bisping Date: Wed, 4 Jan 2023 18:13:51 +0100 Subject: [PATCH] Refactoring Various --- .../figure_detection/figure_detection.py | 22 ++++++------ cv_analysis/table_parsing.py | 34 +++++++------------ cv_analysis/utils/conversion.py | 6 ++++ cv_analysis/utils/rectangle.py | 34 ++++++------------- cv_analysis/utils/utils.py | 2 +- .../figure_detection/figure_detection_test.py | 19 +++++------ test/unit_tests/table_parsing_test.py | 6 ++-- 7 files changed, 53 insertions(+), 70 deletions(-) create mode 100644 cv_analysis/utils/conversion.py diff --git a/cv_analysis/figure_detection/figure_detection.py b/cv_analysis/figure_detection/figure_detection.py index 3d15c38..147edce 100644 --- a/cv_analysis/figure_detection/figure_detection.py +++ b/cv_analysis/figure_detection/figure_detection.py @@ -5,13 +5,13 @@ import numpy as np from cv_analysis.figure_detection.figures import detect_large_coherent_structures from cv_analysis.figure_detection.text import remove_primary_text_regions +from cv_analysis.utils.conversion import box_to_rectangle from cv_analysis.utils.filters import ( is_large_enough, has_acceptable_format, is_not_too_large, ) from cv_analysis.utils.postprocessing import remove_included -from cv_analysis.utils.rectangle import Rectangle def detect_figures(image: np.array): @@ -21,19 +21,19 @@ def detect_figures(image: np.array): figure_filter = partial(is_likely_figure, min_area, max_area, max_width_to_height_ratio) image = remove_primary_text_regions(image) - cnts = detect_large_coherent_structures(image) - cnts = filter(figure_filter, cnts) + contours = detect_large_coherent_structures(image) + contours = filter(figure_filter, contours) - rects = map(cv2.boundingRect, cnts) - rects = map(Rectangle.from_xywh, rects) - rects = remove_included(rects) + boxes = map(cv2.boundingRect, contours) + rectangles = map(box_to_rectangle, boxes) + rectangles = remove_included(rectangles) - return rects + return rectangles -def is_likely_figure(min_area, max_area, max_width_to_height_ratio, cnts): +def is_likely_figure(min_area, max_area, max_width_to_height_ratio, contours): return ( - is_not_too_large(cnts, max_area) - and is_large_enough(cnts, min_area) - and has_acceptable_format(cnts, max_width_to_height_ratio) + is_not_too_large(contours, max_area) + and is_large_enough(contours, min_area) + and has_acceptable_format(contours, max_width_to_height_ratio) ) diff --git a/cv_analysis/table_parsing.py b/cv_analysis/table_parsing.py index 5205585..7d8b166 100644 --- a/cv_analysis/table_parsing.py +++ b/cv_analysis/table_parsing.py @@ -1,14 +1,10 @@ -from functools import partial -from itertools import chain, starmap -from operator import attrgetter - import cv2 import numpy as np from funcy import lmap, lfilter from cv_analysis.layout_parsing import parse_layout -from cv_analysis.utils.postprocessing import remove_isolated # xywh_to_vecs, xywh_to_vec_rect, adjacent1d -from cv_analysis.utils.rectangle import Rectangle +from cv_analysis.utils.conversion import box_to_rectangle +from cv_analysis.utils.postprocessing import remove_isolated from cv_analysis.utils.visual_logging import vizlogger @@ -31,8 +27,7 @@ def apply_motion_blur(image: np.array, angle, size=80): size (int): kernel size; 80 found empirically to work well Returns: - np.array - + np.ndarray """ k = np.zeros((size, size), dtype=np.float32) vizlogger.debug(k, "tables08_blur_kernel1.png") @@ -55,10 +50,9 @@ def isolate_vertical_and_horizontal_components(img_bin): Args: img_bin (np.array): array corresponding to single binarized page image - bounding_rects (list): list of layout boxes of the form (x, y, w, h), potentially containing tables Returns: - np.array + np.ndarray """ line_min_width = 48 kernel_h = np.ones((1, line_min_width), np.uint8) @@ -90,10 +84,9 @@ def find_table_layout_boxes(image: np.array): def is_large_enough(box): (x, y, w, h) = box if w * h >= 100000: - return Rectangle.from_xywh(box) + return box_to_rectangle(box) layout_boxes = parse_layout(image) - a = lmap(is_large_enough, layout_boxes) return lmap(is_large_enough, layout_boxes) @@ -103,7 +96,7 @@ def preprocess(image: np.array): return ~image -def turn_connected_components_into_rects(image: np.array): +def turn_connected_components_into_rectangles(image: np.array): def is_large_enough(stat): x1, y1, w, h, area = stat return area > 2000 and w > 35 and h > 25 @@ -117,7 +110,7 @@ def turn_connected_components_into_rects(image: np.array): return [] -def parse_tables(image: np.array, show=False): +def parse_tables(image: np.array): """Runs the full table parsing process. Args: @@ -129,11 +122,8 @@ def parse_tables(image: np.array, show=False): image = preprocess(image) image = isolate_vertical_and_horizontal_components(image) - rects = turn_connected_components_into_rects(image) - #print(rects, "\n\n") - rects = list(map(Rectangle.from_xywh, rects)) - #print(rects, "\n\n") - rects = remove_isolated(rects) - #print(rects, "\n\n") - - return rects + boxes = turn_connected_components_into_rectangles(image) + rectangles = lmap(box_to_rectangle, boxes) + rectangles = remove_isolated(rectangles) + + return rectangles diff --git a/cv_analysis/utils/conversion.py b/cv_analysis/utils/conversion.py new file mode 100644 index 0000000..c655152 --- /dev/null +++ b/cv_analysis/utils/conversion.py @@ -0,0 +1,6 @@ +from cv_analysis.utils.rectangle import Rectangle + + +def box_to_rectangle(box): + x, y, w, h = box + return Rectangle(x, y, x + w, y + h) diff --git a/cv_analysis/utils/rectangle.py b/cv_analysis/utils/rectangle.py index e2a4ef2..da367d7 100644 --- a/cv_analysis/utils/rectangle.py +++ b/cv_analysis/utils/rectangle.py @@ -1,7 +1,6 @@ # See https://stackoverflow.com/a/33533514 from __future__ import annotations -from json import dumps from typing import Iterable, Union from funcy import identity @@ -21,9 +20,6 @@ class Rectangle: self.__x2 = nearest_valid(x2) self.__y2 = nearest_valid(y2) - self.__w = nearest_valid(x2 - x1) - self.__h = nearest_valid(y2 - y1) - @property def x1(self): return self.__x1 @@ -41,38 +37,30 @@ class Rectangle: return self.__y2 @property - def w(self): - return self.__w + def width(self): + return abs(self.x2 - self.x1) @property - def h(self): - return self.__h + def height(self): + return abs(self.y2 - self.y1) def __hash__(self): return hash((self.x1, self.y1, self.x2, self.y2)) - @classmethod - def from_xywh(cls, xywh: Iterable[Coord], discrete=True): - """Creates a rectangle from a point, width and height.""" - x1, y1, w, h = xywh - x2 = x1 + w - y2 = y1 + h - return cls(x1, y1, x2, y2, discrete=discrete) - - def intersection(self, other): - """Calculates the intersection of this and another rectangle.""" - return intersection(self, other) - def area(self): """Calculates the area of this rectangle.""" return area(self) + def intersection(self, other): + """Calculates the intersection of this and the given other rectangle.""" + return intersection(self, other) + def iou(self, other: Rectangle): - """Calculates the intersection over union of this and another rectangle.""" + """Calculates the intersection over union of this and the given other rectangle.""" return iou(self, other) def includes(self, other: Rectangle, tol=3): - """Checks if this rectangle contains another.""" + """Checks if this rectangle contains the given other.""" return contains(self, other, tol) def is_included(self, rectangles: Iterable[Rectangle]): @@ -80,5 +68,5 @@ class Rectangle: return is_contained(self, rectangles) def adjacent(self, other: Rectangle, tolerance=7): - """Checks if this rectangle is adjacent to another.""" + """Checks if this rectangle is adjacent to the given other.""" return adjacent(self, other, tolerance) diff --git a/cv_analysis/utils/utils.py b/cv_analysis/utils/utils.py index 0902045..e62769b 100644 --- a/cv_analysis/utils/utils.py +++ b/cv_analysis/utils/utils.py @@ -1,7 +1,7 @@ from __future__ import annotations -from numpy import generic import cv2 +from numpy import generic def copy_and_normalize_channels(image): diff --git a/test/unit_tests/figure_detection/figure_detection_test.py b/test/unit_tests/figure_detection/figure_detection_test.py index af0fbab..ce6c3dc 100644 --- a/test/unit_tests/figure_detection/figure_detection_test.py +++ b/test/unit_tests/figure_detection/figure_detection_test.py @@ -15,21 +15,20 @@ class TestFindPrimaryTextRegions: @pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)]) def test_page_without_text_yields_figures(self, figure_detection_pipeline, page_with_images, image_size): - results = figure_detection_pipeline(page_with_images) - result_figures_size = map(lambda x: (x.w, x.h), results) + result_rectangles = figure_detection_pipeline(page_with_images) + result_figure_sizes = map(lambda r: (r.width, r.height), result_rectangles) - assert all([image_size[0] < res[0] and image_size[1] < res[1] for res in result_figures_size]) + assert all([image_size[0] < res[0] and image_size[1] < res[1] for res in result_figure_sizes]) @pytest.mark.parametrize("font_scale", [1, 1.5, 2]) @pytest.mark.parametrize("font_style", [cv2.FONT_HERSHEY_SIMPLEX, cv2.FONT_HERSHEY_COMPLEX]) @pytest.mark.parametrize("text_types", powerset(["body", "header", "caption"])) @pytest.mark.parametrize("error_tolerance", [0.025]) def test_page_with_only_text_yields_no_figures(self, figure_detection_pipeline, page_with_text, error_tolerance): - results = figure_detection_pipeline(page_with_text) - - result_figures_area = sum(map(lambda x: (x.w * x.h), results)) + result_rectangles = figure_detection_pipeline(page_with_text) + result_figure_areas = sum(map(lambda r: (r.width * r.height), result_rectangles)) page_area = prod(page_with_text.shape) - error = result_figures_area / page_area + error = result_figure_areas / page_area assert error <= error_tolerance @@ -45,11 +44,11 @@ class TestFindPrimaryTextRegions: image_size, error_tolerance, ): - results = list(figure_detection_pipeline(page_with_images_and_text)) + result_rectangles = list(figure_detection_pipeline(page_with_images_and_text)) - result_figures_area = sum(map(lambda x: (x.w * x.h), results)) + result_figure_areas = sum(map(lambda r: (r.width * r.height), result_rectangles)) expected_figure_area = prod(image_size) - error = abs(result_figures_area - expected_figure_area) / expected_figure_area + error = abs(result_figure_areas - expected_figure_area) / expected_figure_area assert error <= error_tolerance diff --git a/test/unit_tests/table_parsing_test.py b/test/unit_tests/table_parsing_test.py index 33cd46a..720553a 100644 --- a/test/unit_tests/table_parsing_test.py +++ b/test/unit_tests/table_parsing_test.py @@ -2,7 +2,7 @@ from itertools import starmap import cv2 import pytest -from funcy import lmap, compose +from funcy import lmap, compose, zipdict from cv_analysis.table_parsing import parse_tables from cv_analysis.utils import lift @@ -30,11 +30,11 @@ def error_tolerance(line_thickness): def rectangle_to_dict(rectangle: Rectangle): - return {"x": rectangle.x1, "y": rectangle.y1, "width": rectangle.w, "height": rectangle.h} + return zipdict(["x", "y", "width", "height"], rectangle_to_xywh(rectangle)) def rectangle_to_xywh(rectangle: Rectangle): - return rectangle.x1, rectangle.y1, rectangle.w, rectangle.h + return rectangle.x1, rectangle.y1, abs(rectangle.x1 - rectangle.x2), abs(rectangle.y1 - rectangle.y2) @pytest.mark.parametrize("line_thickness", [1, 2, 3])