refactoring

2022-02-06 14:53:17 +01:00 · 2022-02-06 14:53:17 +01:00 · e652da1fa8
commit e652da1fa8
parent d9567da428
12 changed files with 197 additions and 190 deletions
--- a/vidocp/figure_detection.py
+++ b/vidocp/figure_detection.py
@ -2,14 +2,11 @@ import cv2
 import numpy as np
 from pdf2image import pdf2image

-from vidocp.utils import (
-    show_mpl,
-    draw_rectangles,
-    remove_included,
-    detect_large_coherent_structures,
-    is_large_enough,
-    has_acceptable_format,
-)
+from vidocp.utils.detection import detect_large_coherent_structures
+from vidocp.utils.display import show_mpl
+from vidocp.utils.draw import draw_rectangles
+from vidocp.utils.post_processing import remove_included
+from vidocp.utils.filters import is_large_enough, has_acceptable_format
 from vidocp.utils.text import remove_primary_text_regions


--- a/vidocp/layout_parsing.py
+++ b/vidocp/layout_parsing.py
@ -6,7 +6,9 @@ import cv2
 import numpy as np
 from pdf2image import pdf2image

-from vidocp.utils import draw_rectangles, show_mpl, remove_overlapping, remove_included, has_no_parent
+from vidocp.utils.display import show_mpl
+from vidocp.utils.draw import draw_rectangles
+from vidocp.utils.post_processing import remove_overlapping, remove_included, has_no_parent


 def is_likely_segment(rect, min_area=100):
--- a/vidocp/redaction_detection.py
+++ b/vidocp/redaction_detection.py
@ -5,7 +5,9 @@ import numpy as np
 import pdf2image
 from iteration_utilities import starfilter, first

-from vidocp.utils import show_mpl, draw_contours, is_large_enough, is_filled, is_boxy
+from vidocp.utils.display import show_mpl
+from vidocp.utils.draw import draw_contours
+from vidocp.utils.filters import is_large_enough, is_filled, is_boxy


 def is_likely_redaction(contour, hierarchy, min_area):
--- a/vidocp/table_parsing.py
+++ b/vidocp/table_parsing.py
@ -2,7 +2,8 @@ import cv2
 import numpy as np
 from pdf2image import pdf2image

-from vidocp.utils import draw_stats, show_mpl
+from vidocp.utils.display import show_mpl
+from vidocp.utils.draw import draw_stats


 def add_external_contours(image, img):
--- a/vidocp/utils/init.py
+++ b/vidocp/utils/init.py
@ -1 +1 @@
-from .utils import *
+from .utils import *
--- a/vidocp/utils/detection.py
+++ b/vidocp/utils/detection.py
@ -0,0 +1,23 @@
+import cv2
+import numpy as np
+
+
+def detect_large_coherent_structures(image: np.array):
+    """Detects large coherent structures on an image.
+
+    References:
+         https://stackoverflow.com/questions/60259169/how-to-group-nearby-contours-in-opencv-python-zebra-crossing-detection
+    """
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+
+    thresh = cv2.threshold(gray, 253, 255, cv2.THRESH_BINARY)[1]
+
+    dilate_kernel = cv2.getStructuringElement(cv2.MORPH_OPEN, (5, 5))
+    dilate = cv2.dilate(~thresh, dilate_kernel, iterations=4)
+
+    close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 20))
+    close = cv2.morphologyEx(dilate, cv2.MORPH_CLOSE, close_kernel, iterations=1)
+
+    cnts, _ = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+    return cnts
--- a/vidocp/utils/display.py
+++ b/vidocp/utils/display.py
@ -0,0 +1,16 @@
+import cv2
+from matplotlib import pyplot as plt
+
+
+def show_mpl(image):
+
+    fig, ax = plt.subplots(1, 1)
+    fig.set_size_inches(20, 20)
+    ax.imshow(image)
+    plt.show()
+
+
+def show_cv2(image):
+
+    cv2.imshow("", image)
+    cv2.waitKey(0)
--- a/vidocp/utils/draw.py
+++ b/vidocp/utils/draw.py
@ -0,0 +1,56 @@
+import cv2
+
+from vidocp.utils import copy_and_normalize_channels
+
+
+def draw_contours(image, contours):
+
+    image = copy_and_normalize_channels(image)
+
+    for cont in contours:
+        cv2.drawContours(image, cont, -1, (0, 255, 0), 4)
+
+    return image
+
+
+def draw_rectangles(image, rectangles, color=None):
+
+    image = copy_and_normalize_channels(image)
+
+    if not color:
+        color = (0, 255, 0)
+
+    for rect in rectangles:
+        x, y, w, h = rect
+        cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
+
+    return image
+
+
+def draw_stats(image, stats, annotate=False):
+
+    image = copy_and_normalize_channels(image)
+
+    keys = ["x", "y", "w", "h"]
+
+    def annotate_stat(x, y, w, h):
+
+        for i, (s, v) in enumerate(zip(keys, [x, y, w, h])):
+            anno = f"{s} = {v}"
+            xann = int(x + 5)
+            yann = int(y + h - (20 * (i + 1)))
+            cv2.putText(image, anno, (xann, yann), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
+
+    def draw_stat(stat):
+
+        x, y, w, h, area = stat
+
+        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
+
+        if annotate:
+            annotate_stat(x, y, w, h)
+
+    for stat in stats[2:]:
+        draw_stat(stat)
+
+    return image
--- a/vidocp/utils/filters.py
+++ b/vidocp/utils/filters.py
@ -0,0 +1,25 @@
+import cv2
+
+
+def is_large_enough(cont, min_area):
+    return cv2.contourArea(cont, False) > min_area
+
+
+def has_acceptable_format(cont, max_width_to_height_ratio):
+    _, _, w, h = cv2.boundingRect(cont)
+    return max_width_to_height_ratio >= w / h >= (1 / max_width_to_height_ratio)
+
+
+def is_filled(hierarchy):
+    """Checks whether a hierarchy is filled.
+
+    References:
+        https://stackoverflow.com/questions/60095520/how-to-distinguish-filled-circle-contour-and-unfilled-circle-contour-in-opencv
+    """
+    return hierarchy[3] <= 0 and hierarchy[2] == -1
+
+
+def is_boxy(contour):
+    epsilon = 0.01 * cv2.arcLength(contour, True)
+    approx = cv2.approxPolyDP(contour, epsilon, True)
+    return len(approx) <= 10
--- a/vidocp/utils/post_processing.py
+++ b/vidocp/utils/post_processing.py
@ -0,0 +1,62 @@
+from collections import namedtuple
+from functools import partial
+
+
+def remove_overlapping(rectangles):
+    def overlap(a, b):
+        return compute_intersection(a, b) > 0
+
+    def does_not_overlap(rect, rectangles):
+        return not any(overlap(rect, r2) for r2 in rectangles if not rect == r2)
+
+    rectangles = list(map(xywh_to_vec_rect, rectangles))
+    rectangles = filter(partial(does_not_overlap, rectangles=rectangles), rectangles)
+    rectangles = map(vec_rect_to_xywh, rectangles)
+    return rectangles
+
+
+def remove_included(rectangles):
+    def included(a, b):
+        return b.xmin >= a.xmin and b.ymin >= a.ymin and b.xmax <= a.xmax and b.ymax <= a.ymax
+
+    def is_not_included(rect, rectangles):
+        return not any(included(r2, rect) for r2 in rectangles if not rect == r2)
+
+    rectangles = list(map(xywh_to_vec_rect, rectangles))
+    rectangles = filter(partial(is_not_included, rectangles=rectangles), rectangles)
+    rectangles = map(vec_rect_to_xywh, rectangles)
+    return rectangles
+
+
+Rectangle = namedtuple("Rectangle", "xmin ymin xmax ymax")
+
+
+def make_box(x1, y1, x2, y2):
+    keys = "x1", "y1", "x2", "y2"
+    return dict(zip(keys, [x1, y1, x2, y2]))
+
+
+def compute_intersection(a, b):
+
+    dx = min(a.xmax, b.xmax) - max(a.xmin, b.xmin)
+    dy = min(a.ymax, b.ymax) - max(a.ymin, b.ymin)
+
+    return dx * dy if (dx >= 0) and (dy >= 0) else 0
+
+
+def has_no_parent(hierarchy):
+    return hierarchy[-1] <= 0
+
+
+def xywh_to_vec_rect(rect):
+    x1, y1, w, h = rect
+    x2 = x1 + w
+    y2 = y1 + h
+    return Rectangle(x1, y1, x2, y2)
+
+
+def vec_rect_to_xywh(rect):
+    x, y, x2, y2 = rect
+    w = x2 - x
+    h = y2 - y
+    return x, y, w, h
--- a/vidocp/utils/text.py
+++ b/vidocp/utils/text.py
@ -54,4 +54,4 @@ def find_primary_text_regions(image):

    cnts = filter(is_likely_primary_text_segments, cnts)

-    return cnts
+    return cnts
--- a/vidocp/utils/utils.py
+++ b/vidocp/utils/utils.py
@ -1,23 +1,4 @@
-from collections import namedtuple
-from functools import partial
-
 import cv2
-import numpy as np
-from matplotlib import pyplot as plt
-
-
-def show_mpl(image):
-
-    fig, ax = plt.subplots(1, 1)
-    fig.set_size_inches(20, 20)
-    ax.imshow(image)
-    plt.show()
-
-
-def show_cv2(image):
-
-    cv2.imshow("", image)
-    cv2.waitKey(0)


 def copy_and_normalize_channels(image):
@ -29,161 +10,3 @@ def copy_and_normalize_channels(image):
        pass

    return image
-
-
-def draw_contours(image, contours):
-
-    image = copy_and_normalize_channels(image)
-
-    for cont in contours:
-        cv2.drawContours(image, cont, -1, (0, 255, 0), 4)
-
-    return image
-
-
-def draw_rectangles(image, rectangles, color=None):
-
-    image = copy_and_normalize_channels(image)
-
-    if not color:
-        color = (0, 255, 0)
-
-    for rect in rectangles:
-        x, y, w, h = rect
-        cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
-
-    return image
-
-
-def draw_stats(image, stats, annotate=False):
-
-    image = copy_and_normalize_channels(image)
-
-    keys = ["x", "y", "w", "h"]
-
-    def annotate_stat(x, y, w, h):
-
-        for i, (s, v) in enumerate(zip(keys, [x, y, w, h])):
-            anno = f"{s} = {v}"
-            xann = int(x + 5)
-            yann = int(y + h - (20 * (i + 1)))
-            cv2.putText(image, anno, (xann, yann), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
-
-    def draw_stat(stat):
-
-        x, y, w, h, area = stat
-
-        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
-
-        if annotate:
-            annotate_stat(x, y, w, h)
-
-    for stat in stats[2:]:
-        draw_stat(stat)
-
-    return image
-
-
-def remove_overlapping(rectangles):
-    def overlap(a, b):
-        return compute_intersection(a, b) > 0
-
-    def does_not_overlap(rect, rectangles):
-        return not any(overlap(rect, r2) for r2 in rectangles if not rect == r2)
-
-    rectangles = list(map(xywh_to_vec_rect, rectangles))
-    rectangles = filter(partial(does_not_overlap, rectangles=rectangles), rectangles)
-    rectangles = map(vec_rect_to_xywh, rectangles)
-    return rectangles
-
-
-def remove_included(rectangles):
-    def included(a, b):
-        return b.xmin >= a.xmin and b.ymin >= a.ymin and b.xmax <= a.xmax and b.ymax <= a.ymax
-
-    def is_not_included(rect, rectangles):
-        return not any(included(r2, rect) for r2 in rectangles if not rect == r2)
-
-    rectangles = list(map(xywh_to_vec_rect, rectangles))
-    rectangles = filter(partial(is_not_included, rectangles=rectangles), rectangles)
-    rectangles = map(vec_rect_to_xywh, rectangles)
-    return rectangles
-
-
-Rectangle = namedtuple("Rectangle", "xmin ymin xmax ymax")
-
-
-def make_box(x1, y1, x2, y2):
-    keys = "x1", "y1", "x2", "y2"
-    return dict(zip(keys, [x1, y1, x2, y2]))
-
-
-def compute_intersection(a, b):
-
-    dx = min(a.xmax, b.xmax) - max(a.xmin, b.xmin)
-    dy = min(a.ymax, b.ymax) - max(a.ymin, b.ymin)
-
-    return dx * dy if (dx >= 0) and (dy >= 0) else 0
-
-
-def has_no_parent(hierarchy):
-    return hierarchy[-1] <= 0
-
-
-def xywh_to_vec_rect(rect):
-    x1, y1, w, h = rect
-    x2 = x1 + w
-    y2 = y1 + h
-    return Rectangle(x1, y1, x2, y2)
-
-
-def vec_rect_to_xywh(rect):
-    x, y, x2, y2 = rect
-    w = x2 - x
-    h = y2 - y
-    return x, y, w, h
-
-
-def detect_large_coherent_structures(image: np.array):
-    """Detects large coherent structures on an image.
-
-    References:
-         https://stackoverflow.com/questions/60259169/how-to-group-nearby-contours-in-opencv-python-zebra-crossing-detection
-    """
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-
-    thresh = cv2.threshold(gray, 253, 255, cv2.THRESH_BINARY)[1]
-
-    dilate_kernel = cv2.getStructuringElement(cv2.MORPH_OPEN, (5, 5))
-    dilate = cv2.dilate(~thresh, dilate_kernel, iterations=4)
-
-    close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 20))
-    close = cv2.morphologyEx(dilate, cv2.MORPH_CLOSE, close_kernel, iterations=1)
-
-    cnts, _ = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-
-    return cnts
-
-
-def is_large_enough(cont, min_area):
-    return cv2.contourArea(cont, False) > min_area
-
-
-def has_acceptable_format(cont, max_width_to_height_ratio):
-    _, _, w, h = cv2.boundingRect(cont)
-    return max_width_to_height_ratio >= w / h >= (1 / max_width_to_height_ratio)
-
-
-def is_filled(hierarchy):
-    """Checks whether a hierarchy is filled.
-
-    References:
-        https://stackoverflow.com/questions/60095520/how-to-distinguish-filled-circle-contour-and-unfilled-circle-contour-in-opencv
-    """
-    return hierarchy[3] <= 0 and hierarchy[2] == -1
-
-
-def is_boxy(contour):
-    epsilon = 0.01 * cv2.arcLength(contour, True)
-    approx = cv2.approxPolyDP(contour, epsilon, True)
-    return len(approx) <= 10