reduce pytest parameter combinatons

clear color map cache per pytest parameter combination
Remove obsolete line
2023-02-13 14:08:17 +01:00 · 2023-02-01 18:56:16 +01:00 · 2023-02-01 18:38:52 +01:00 · 2023-02-01 18:38:05 +01:00 · 2023-02-01 18:33:45 +01:00 · 2023-02-01 18:30:01 +01:00
76 changed files with 3870 additions and 580 deletions
--- a/cv_analysis/figure_detection/figure_detection.py
+++ b/cv_analysis/figure_detection/figure_detection.py
@ -1,17 +1,17 @@
 from functools import partial
 import cv2
 import numpy as np
 from funcy import lmap
 from cv_analysis.figure_detection.figures import detect_large_coherent_structures
 from cv_analysis.figure_detection.text import remove_primary_text_regions
 from cv_analysis.utils.conversion import contour_to_rectangle
 from cv_analysis.utils.filters import (
    is_large_enough,
    has_acceptable_format,
-    is_not_too_large,
+    is_small_enough,
 )
 from cv_analysis.utils.postprocessing import remove_included
 from cv_analysis.utils.structures import Rectangle
 def detect_figures(image: np.array):
@ -21,19 +21,18 @@ def detect_figures(image: np.array):
    figure_filter = partial(is_likely_figure, min_area, max_area, max_width_to_height_ratio)
    image = remove_primary_text_regions(image)
-    cnts = detect_large_coherent_structures(image)
+    contours = detect_large_coherent_structures(image)
-    cnts = filter(figure_filter, cnts)
+    contours = filter(figure_filter, contours)
-    rects = map(cv2.boundingRect, cnts)
+    rectangles = lmap(contour_to_rectangle, contours)
-    rects = map(Rectangle.from_xywh, rects)
+    rectangles = remove_included(rectangles)
    rects = remove_included(rects)
-    return rects
+    return rectangles
-def is_likely_figure(min_area, max_area, max_width_to_height_ratio, cnts):
+def is_likely_figure(min_area, max_area, max_width_to_height_ratio, contours):
    return (
-        is_not_too_large(cnts, max_area)
+        is_small_enough(contours, max_area)
-        and is_large_enough(cnts, min_area)
+        and is_large_enough(contours, min_area)
-        and has_acceptable_format(cnts, max_width_to_height_ratio)
+        and has_acceptable_format(contours, max_width_to_height_ratio)
    )
--- a/cv_analysis/figure_detection/figures.py
+++ b/cv_analysis/figure_detection/figures.py
@ -1,25 +1,33 @@
 import cv2
 import numpy as np
 from cv_analysis.utils.common import find_contours_and_hierarchies
 def detect_large_coherent_structures(image: np.array):
-    """Detects large coherent structures on an image.
+    """Detects large coherent structures in an image.
    Expects an image with binary color space (e.g. threshold applied).
    Args:
        image (np.array): Image to look for large coherent structures in.
    Returns:
-        contours
+        list: List of contours.
    References:
         https://stackoverflow.com/questions/60259169/how-to-group-nearby-contours-in-opencv-python-zebra-crossing-detection
    """
    assert len(image.shape) == 2
    # FIXME: Parameterize via factory
    dilate_kernel = cv2.getStructuringElement(cv2.MORPH_OPEN, (5, 5))
    # FIXME: Parameterize via factory
    dilate = cv2.dilate(image, dilate_kernel, iterations=4)
-
+    # FIXME: Parameterize via factory
    close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 20))
-    close = cv2.morphologyEx(dilate, cv2.MORPH_CLOSE, close_kernel, iterations=1)
+    # FIXME: Parameterize via factory
    close = cv2.morphologyEx(dilate, cv2.MORPH_CLOSE, close_kernel, iterations=1)  # TODO: Tweak iterations
-    cnts, _ = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    contours, _ = find_contours_and_hierarchies(close)
-    return cnts
+    return contours
--- a/cv_analysis/figure_detection/text.py
+++ b/cv_analysis/figure_detection/text.py
@ -1,5 +1,7 @@
 import cv2
 from cv_analysis.utils.common import normalize_to_gray_scale
 def remove_primary_text_regions(image):
    """Removes regions of primary text, meaning no figure descriptions for example, but main text body paragraphs.
@ -35,6 +37,7 @@ def remove_primary_text_regions(image):
 def apply_threshold_to_image(image):
    """Converts an image to black and white."""
    image = normalize_to_gray_scale(image)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image
    return cv2.threshold(image, 253, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
--- a/cv_analysis/layout_parsing.py
+++ b/cv_analysis/layout_parsing.py
@ -1,87 +1,80 @@
-import itertools
+from functools import partial
-from itertools import compress
+from typing import Iterable, List
 from itertools import starmap
 from operator import __and__
 import cv2
 import numpy as np
 from funcy import compose, rcompose, lkeep
-
+from cv_analysis.utils import lstarkeep
-from cv_analysis.utils.connect_rects import connect_related_rects2
+from cv_analysis.utils.common import (
-from cv_analysis.utils.structures import Rectangle
+    find_contours_and_hierarchies,
-from cv_analysis.utils.postprocessing import (
+    dilate_page_components,
-    remove_overlapping,
+    normalize_to_gray_scale,
-    remove_included,
+    threshold_image,
-    has_no_parent,
+    invert_image,
    fill_rectangles,
 )
-from cv_analysis.utils.visual_logging import vizlogger
+from cv_analysis.utils.conversion import contour_to_rectangle
-
+from cv_analysis.utils.merging import merge_related_rectangles
-#could be dynamic parameter is the scan is noisy
+from cv_analysis.utils.postprocessing import remove_included, has_no_parent
-def is_likely_segment(rect, min_area=100):
+from cv_analysis.utils.rectangle import Rectangle
    return cv2.contourArea(rect, False) > min_area
-def find_segments(image):
+def parse_layout(image: np.array) -> List[Rectangle]:
-    contours, hierarchies = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    """Parse the layout of a page.
    mask1 = map(is_likely_segment, contours)
    mask2 = map(has_no_parent, hierarchies[0])
    mask = starmap(__and__, zip(mask1, mask2))
    contours = compress(contours, mask)
-    rectangles = (cv2.boundingRect(c) for c in contours)
+    Args:
        image: Image of the page.
    Returns:
        List of rectangles representing the layout of the page as identified page elements.
    """
    rectangles = rcompose(
        find_segments,
        remove_included,
        merge_related_rectangles,
        remove_included,
    )(image)
    return rectangles
-def dilate_page_components(image):
+def find_segments(image: np.ndarray) -> List[Rectangle]:
-    #if text is detected in words make kernel bigger
+    """Find segments in a page. Segments are structural elements of a page, such as text blocks, tables, etc."""
-    image = cv2.GaussianBlur(image, (7, 7), 0)
+    rectangles = rcompose(
-    thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
+        prepare_for_initial_detection,
-    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+        __find_segments,
-    return cv2.dilate(thresh, kernel, iterations=4)
+        partial(prepare_for_meta_detection, image.copy()),
        __find_segments,
    )(image)
    return rectangles
-def fill_in_component_area(image, rect):
+def prepare_for_initial_detection(image: np.ndarray) -> np.ndarray:
-    x, y, w, h = rect
+    return compose(dilate_page_components, normalize_to_gray_scale)(image)
    cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1)
    cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 7)
    _, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY)
    return ~image
 def __find_segments(image: np.ndarray) -> List[Rectangle]:
    def to_rectangle_if_valid(contour, hierarchy):
        return contour_to_rectangle(contour) if is_likely_segment(contour) and has_no_parent(hierarchy) else None
-def parse_layout(image: np.array):
+    rectangles = lstarkeep(to_rectangle_if_valid, zip(*find_contours_and_hierarchies(image)))
    image = image.copy()
    image_ = image.copy()
-    if len(image_.shape) > 2:
+    return rectangles
        image_ = cv2.cvtColor(image_, cv2.COLOR_BGR2GRAY)
    dilate = dilate_page_components(image_)
    # show_mpl(dilate)
-    rects = list(find_segments(dilate))
+def prepare_for_meta_detection(image: np.ndarray, rectangles: Iterable[Rectangle]) -> np.ndarray:
    image = rcompose(
        fill_rectangles,
        threshold_image,
        invert_image,
        normalize_to_gray_scale,
    )(image, rectangles)
-    # -> Run meta detection on the previous detections TODO: refactor
+    return image
    for rect in rects:
        x, y, w, h = rect
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1)
        cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 7)
    # show_mpl(image)
    _, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY)
    image = ~image
    # show_mpl(image)
    if len(image.shape) > 2:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    rects = find_segments(image)
    # <- End of meta detection
    rects = list(map(Rectangle.from_xywh, rects))
    rects = remove_included(rects)
-    rects = map(lambda r: r.xywh(), rects)
+def is_likely_segment(rectangle: Rectangle, min_area: float = 100) -> bool:
-    rects = connect_related_rects2(rects)
+    # FIXME: Parameterize via factory
-    rects = list(map(Rectangle.from_xywh, rects))
+    return cv2.contourArea(rectangle, False) > min_area
    rects = remove_included(rects)
    return rects
--- a/cv_analysis/locations.py
+++ b/cv_analysis/locations.py
@ -5,5 +5,8 @@ from pathlib import Path
 MODULE_PATH = Path(__file__).resolve().parents[0]
 PACKAGE_ROOT_PATH = MODULE_PATH.parents[0]
 REPO_ROOT_PATH = PACKAGE_ROOT_PATH
 TEST_DIR_PATH = REPO_ROOT_PATH / "test"
-TEST_DATA_DVC = TEST_DIR_PATH / "test_data.dvc"
+TEST_DATA_DVC = TEST_DIR_PATH / "test_data.dvc"  # TODO: remove once new tests are in place
 TEST_DATA_DIR = TEST_DIR_PATH / "data"
 TEST_PAGE_TEXTURES_DIR = TEST_DATA_DIR / "paper"
--- a/cv_analysis/redaction_detection.py
+++ b/cv_analysis/redaction_detection.py
@ -5,7 +5,7 @@ import numpy as np
 from iteration_utilities import starfilter, first
 from cv_analysis.utils.filters import is_large_enough, is_filled, is_boxy
-from cv_analysis.utils.visual_logging import vizlogger
+from cv_analysis.utils.visual_logger import vizlogger
 def is_likely_redaction(contour, hierarchy, min_area):
--- a/cv_analysis/server/pipeline.py
+++ b/cv_analysis/server/pipeline.py
@ -5,34 +5,29 @@ from funcy import lmap, flatten
 from cv_analysis.figure_detection.figure_detection import detect_figures
 from cv_analysis.table_parsing import parse_tables
-from cv_analysis.utils.structures import Rectangle
+from cv_analysis.utils.rectangle import Rectangle
 from pdf2img.conversion import convert_pages_to_images
 from pdf2img.default_objects.image import ImagePlus, ImageInfo
 from pdf2img.default_objects.rectangle import RectanglePlus
-def get_analysis_pipeline(operation, table_parsing_skip_pages_without_images):
+def make_analysis_pipeline_for_element_type(segment_type, **kwargs):
-    if operation == "table":
+    if segment_type == "table":
-        return make_analysis_pipeline(
+        return make_analysis_pipeline(parse_tables, table_parsing_formatter, dpi=200, **kwargs)
-            parse_tables,
+    elif segment_type == "figure":
-            table_parsing_formatter,
+        return make_analysis_pipeline(detect_figures, figure_detection_formatter, dpi=200, **kwargs)
            dpi=200,
            skip_pages_without_images=table_parsing_skip_pages_without_images,
        )
    elif operation == "figure":
        return make_analysis_pipeline(detect_figures, figure_detection_formatter, dpi=200)
    else:
-        raise
+        raise ValueError(f"Unknown segment type {segment_type}.")
 def make_analysis_pipeline(analysis_fn, formatter, dpi, skip_pages_without_images=False):
-    def analyse_pipeline(pdf: bytes, index=None):
+    def analysis_pipeline(pdf: bytes, index=None):
        def parse_page(page: ImagePlus):
            image = page.asarray()
-            rects = analysis_fn(image)
+            rectangles = analysis_fn(image)
-            if not rects:
+            if not rectangles:
                return
-            infos = formatter(rects, page, dpi)
+            infos = formatter(rectangles, page, dpi)
            return infos
        pages = convert_pages_to_images(pdf, index=index, dpi=dpi, skip_pages_without_images=skip_pages_without_images)
@ -40,22 +35,26 @@ def make_analysis_pipeline(analysis_fn, formatter, dpi, skip_pages_without_image
        yield from flatten(filter(truth, results))
-    return analyse_pipeline
+    return analysis_pipeline
-def table_parsing_formatter(rects, page: ImagePlus, dpi):
+def table_parsing_formatter(rectangles, page: ImagePlus, dpi):
-    def format_rect(rect: Rectangle):
+    def format_rectangle(rectangle: Rectangle):
-        rect_plus = RectanglePlus.from_pixels(*rect.xyxy(), page.info, alpha=False, dpi=dpi)
+        rectangle_plus = RectanglePlus.from_pixels(*rectangle_to_xyxy(rectangle), page.info, alpha=False, dpi=dpi)
-        return rect_plus.asdict(derotate=True)
+        return rectangle_plus.asdict(derotate=True)
-    bboxes = lmap(format_rect, rects)
+    bboxes = lmap(format_rectangle, rectangles)
    return {"pageInfo": page.asdict(natural_index=True), "tableCells": bboxes}
-def figure_detection_formatter(rects, page, dpi):
+def figure_detection_formatter(rectangles, page, dpi):
-    def format_rect(rect: Rectangle):
+    def format_rectangle(rectangle: Rectangle):
-        rect_plus = RectanglePlus.from_pixels(*rect.xyxy(), page.info, alpha=False, dpi=dpi)
+        rect_plus = RectanglePlus.from_pixels(*rectangle_to_xyxy(rectangle), page.info, alpha=False, dpi=dpi)
        return asdict(ImageInfo(page.info, rect_plus.asbbox(derotate=False), rect_plus.alpha))
-    return lmap(format_rect, rects)
+    return lmap(format_rectangle, rectangles)
 def rectangle_to_xyxy(rectangle: Rectangle):
    return rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y2
--- a/cv_analysis/table_parsing.py
+++ b/cv_analysis/table_parsing.py
@ -1,15 +1,11 @@
 from functools import partial
 from itertools import chain, starmap
 from operator import attrgetter
 import cv2
 import numpy as np
 from funcy import lmap, lfilter
 from cv_analysis.layout_parsing import parse_layout
-from cv_analysis.utils.postprocessing import remove_isolated  # xywh_to_vecs, xywh_to_vec_rect, adjacent1d
+from cv_analysis.utils.conversion import box_to_rectangle
-from cv_analysis.utils.structures import Rectangle
+from cv_analysis.utils.postprocessing import remove_isolated
-from cv_analysis.utils.visual_logging import vizlogger
+from cv_analysis.utils.visual_logger import vizlogger
 def add_external_contours(image, image_h_w_lines_only):
@ -31,8 +27,7 @@ def apply_motion_blur(image: np.array, angle, size=80):
        size (int): kernel size; 80 found empirically to work well
    Returns:
-        np.array
+        np.ndarray
    """
    k = np.zeros((size, size), dtype=np.float32)
    vizlogger.debug(k, "tables08_blur_kernel1.png")
@ -55,10 +50,9 @@ def isolate_vertical_and_horizontal_components(img_bin):
    Args:
        img_bin (np.array): array corresponding to single binarized page image
        bounding_rects (list): list of layout boxes of the form (x, y, w, h), potentially containing tables
    Returns:
-        np.array
+        np.ndarray
    """
    line_min_width = 48
    kernel_h = np.ones((1, line_min_width), np.uint8)
@ -90,10 +84,9 @@ def find_table_layout_boxes(image: np.array):
    def is_large_enough(box):
        (x, y, w, h) = box
        if w * h >= 100000:
-            return Rectangle.from_xywh(box)
+            return box_to_rectangle(box)
    layout_boxes = parse_layout(image)
    a = lmap(is_large_enough, layout_boxes)
    return lmap(is_large_enough, layout_boxes)
@ -103,7 +96,7 @@ def preprocess(image: np.array):
    return ~image
-def turn_connected_components_into_rects(image: np.array):
+def turn_connected_components_into_rectangles(image: np.array):
    def is_large_enough(stat):
        x1, y1, w, h, area = stat
        return area > 2000 and w > 35 and h > 25
@ -117,7 +110,7 @@ def turn_connected_components_into_rects(image: np.array):
    return []
-def parse_tables(image: np.array, show=False):
+def parse_tables(image: np.array):
    """Runs the full table parsing process.
    Args:
@ -129,11 +122,8 @@ def parse_tables(image: np.array, show=False):
    image = preprocess(image)
    image = isolate_vertical_and_horizontal_components(image)
-    rects = turn_connected_components_into_rects(image)
+    boxes = turn_connected_components_into_rectangles(image)
-    #print(rects, "\n\n")
+    rectangles = lmap(box_to_rectangle, boxes)
-    rects = list(map(Rectangle.from_xywh, rects))
+    rectangles = remove_isolated(rectangles)
-    #print(rects, "\n\n")
+
-    rects = remove_isolated(rects)
+    return rectangles
    #print(rects, "\n\n")
    return rects
--- a/cv_analysis/utils/common.py
+++ b/cv_analysis/utils/common.py
@ -0,0 +1,51 @@
 from functools import reduce
 from typing import Iterable
 import cv2
 import numpy as np
 from funcy import first
 from cv_analysis.utils.rectangle import Rectangle
 def find_contours_and_hierarchies(image):
    contours, hierarchies = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    return contours, first(hierarchies) if hierarchies is not None else None
 def dilate_page_components(image: np.ndarray) -> np.ndarray:
    # FIXME: Parameterize via factory
    image = cv2.GaussianBlur(image, (7, 7), 0)
    # FIXME: Parameterize via factory
    thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    # FIXME: Parameterize via factory
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    # FIXME: Parameterize via factory
    dilate = cv2.dilate(thresh, kernel, iterations=4)
    return dilate
 def normalize_to_gray_scale(image: np.ndarray) -> np.ndarray:
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image
    return image
 def threshold_image(image: np.ndarray) -> np.ndarray:
    # FIXME: Parameterize via factory
    _, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY)
    return image
 def invert_image(image: np.ndarray):
    return ~image
 def fill_rectangles(image: np.ndarray, rectangles: Iterable[Rectangle]) -> np.ndarray:
    image = reduce(fill_in_component_area, rectangles, image)
    return image
 def fill_in_component_area(image: np.ndarray, rectangle: Rectangle) -> np.ndarray:
    cv2.rectangle(image, (rectangle.x1, rectangle.y1), (rectangle.x2, rectangle.y2), (0, 0, 0), -1)
    cv2.rectangle(image, (rectangle.x1, rectangle.y1), (rectangle.x2, rectangle.y2), (255, 255, 255), 7)
    return image
--- a/cv_analysis/utils/connect_rects.py
+++ b/cv_analysis/utils/connect_rects.py
@ -1,120 +0,0 @@
 from itertools import combinations, starmap, product
 from typing import Iterable
 def is_near_enough(rect_pair, max_gap=14):
    x1, y1, w1, h1 = rect_pair[0]
    x2, y2, w2, h2 = rect_pair[1]
    return any([abs(x1 - (x2 + w2)) <= max_gap,
                abs(x2 - (x1 + w1)) <= max_gap,
                abs(y2 - (y1 + h1)) <= max_gap,
                abs(y1 - (y2 + h2)) <= max_gap])
 def is_overlapping(rect_pair):
    x1, y1, w1, h1 = rect_pair[0]
    x2, y2, w2, h2 = rect_pair[1]
    dx = min(x1 + w1, x2 + w2) - max(x1, x2)
    dy = min(y1 + h1, y2 + h2) - max(y1, y2)
    return True if (dx >= 0) and (dy >= 0) else False
 def is_on_same_line(rect_pair):
    x1, y1, w1, h1 = rect_pair[0]
    x2, y2, w2, h2 = rect_pair[1]
    return any([any([abs(y1 - y2) <= 10,
                     abs(y1 + h1 - (y2 + h2)) <= 10]),
                any([y2 <= y1 and y1 + h1 <= y2 + h2,
                     y1 <= y2 and y2 + h2 <= y1 + h1])])
 def has_correct_position1(rect_pair):
    x1, y1, w1, h1 = rect_pair[0]
    x2, y2, w2, h2 = rect_pair[1]
    return any([any([abs(x1 - x2) <= 10,
                     abs(y1 - y2) <= 10,
                     abs(x1 + w1 - (x2 + w2)) <= 10,
                     abs(y1 + h1 - (y2 + h2)) <= 10]),
                any([y2 <= y1 and y1 + h1 <= y2 + h2,
                     y1 <= y2 and y2 + h2 <= y1 + h1,
                     x2 <= x1 and x1 + w1 <= x2 + w2,
                     x1 <= x2 and x2 + w2 <= x1 + w1])])
 def is_related(rect_pair):
    return (is_near_enough(rect_pair) and has_correct_position1(rect_pair)) or is_overlapping(
        rect_pair)
 def fuse_rects(rect1, rect2):
    if rect1 == rect2:
        return rect1
    x1, y1, w1, h1 = rect1
    x2, y2, w2, h2 = rect2
    topleft = list(min(product([x1, x2], [y1, y2])))
    bottomright = list(max(product([x1 + w1, x2 + w2], [y1 + h1, y2 + h2])))
    w = [bottomright[0] - topleft[0]]
    h = [bottomright[1] - topleft[1]]
    return tuple(topleft + w + h)
 def rects_not_the_same(r):
    return r[0] != r[1]
 def find_related_rects(rects):
    rect_pairs = list(filter(is_related, combinations(rects, 2)))
    rect_pairs = list(filter(rects_not_the_same, rect_pairs))
    if not rect_pairs:
        return [], rects
    rel_rects = list(set([rect for pair in rect_pairs for rect in pair]))
    unrel_rects = [rect for rect in rects if rect not in rel_rects]
    return rect_pairs, unrel_rects
 def connect_related_rects(rects):
    rects_to_connect, rects_new = find_related_rects(rects)
    while len(rects_to_connect) > 0:
        rects_fused = list(starmap(fuse_rects, rects_to_connect))
        rects_fused = list(dict.fromkeys(rects_fused))
        if len(rects_fused) == 1:
            rects_new += rects_fused
            rects_fused = []
        rects_to_connect, connected_rects = find_related_rects(rects_fused)
        rects_new += connected_rects
        if len(rects_to_connect) > 1 and len(set(rects_to_connect)) == 1:
            rects_new.append(rects_fused[0])
            rects_to_connect = []
    return rects_new
 def connect_related_rects2(rects: Iterable[tuple]):
    rects = list(rects)
    current_idx = 0
    while True:
        if current_idx + 1 >= len(rects) or len(rects) <= 1:
            break
        merge_happened = False
        current_rect = rects.pop(current_idx)
        for idx, maybe_related_rect in enumerate(rects):
            if is_related((current_rect, maybe_related_rect)):
                current_rect = fuse_rects(current_rect, maybe_related_rect)
                rects.pop(idx)
                merge_happened = True
                break
        rects.insert(0, current_rect)
        if not merge_happened:
            current_idx += 1
        elif merge_happened:
            current_idx = 0
    return rects
--- a/cv_analysis/utils/conversion.py
+++ b/cv_analysis/utils/conversion.py
@ -0,0 +1,47 @@
 import json
 from typing import Sequence, Union
 import cv2
 import numpy as np
 from PIL import Image
 from cv_analysis.utils.rectangle import Rectangle
 Image_t = Union[Image.Image, np.ndarray]
 def contour_to_rectangle(contour):
    return box_to_rectangle(cv2.boundingRect(contour))
 def box_to_rectangle(box: Sequence[int]) -> Rectangle:
    x, y, w, h = box
    return Rectangle(x, y, x + w, y + h)
 def rectangle_to_box(rectangle: Rectangle) -> Sequence[int]:
    return [rectangle.x1, rectangle.y1, rectangle.width, rectangle.height]
 class RectangleJSONEncoder(json.JSONEncoder):
    def __init__(self, *args, **kwargs):
        json.JSONEncoder.__init__(self, *args, **kwargs)
        self._replacement_map = {}
    def default(self, o):
        if isinstance(o, Rectangle):
            return {"x1": o.x1, "x2": o.x2, "y1": o.y1, "y2": o.y2}
        else:
            return json.JSONEncoder.default(self, o)
    def encode(self, o):
        result = json.JSONEncoder.encode(self, o)
        return result
 def normalize_image_format_to_array(image: Image_t):
    return np.array(image).astype(np.uint8) if isinstance(image, Image.Image) else image
 def normalize_image_format_to_pil(image: Image_t):
    return Image.fromarray(image.astype(np.uint8)) if isinstance(image, np.ndarray) else image
--- a/cv_analysis/utils/display.py
+++ b/cv_analysis/utils/display.py
@ -1,33 +1,51 @@
 import cv2
 import numpy as np
 from PIL import Image
 from PIL.Image import Image as Image_t
 from matplotlib import pyplot as plt
 from cv_analysis.utils.conversion import normalize_image_format_to_array
-def show_image_cv2(image, maxdim=700):
+
 def show_image(image, backend="mpl", **kwargs):
    image = normalize_image_format_to_array(image)
    if backend == "mpl":
        show_image_mpl(image, **kwargs)
    elif backend == "cv2":
        show_image_cv2(image, **kwargs)
    elif backend == "pil":
        Image.fromarray(image).show()
    else:
        raise ValueError(f"Unknown backend: {backend}")
 def show_image_cv2(image, maxdim=700, **kwargs):
    h, w, c = image.shape
    maxhw = max(h, w)
    if maxhw > maxdim:
        ratio = maxdim / maxhw
        h = int(h * ratio)
        w = int(w * ratio)
-        img = cv2.resize(image, (h, w))
+
    img = cv2.resize(image, (h, w))
    cv2.imshow("", img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
-def show_image_mpl(image):
+def show_image_mpl(image, **kwargs):
    if isinstance(image, Image_t):
        # noinspection PyTypeChecker
        image = np.array(image)
        # noinspection PyArgumentList
        assert image.max() <= 255
    fig, ax = plt.subplots(1, 1)
    fig.set_size_inches(20, 20)
    assert image.dtype == np.uint8
    ax.imshow(image, cmap="gray")
    ax.title.set_text(kwargs.get("title", ""))
    plt.show()
 def show_image(image, backend="m"):
    if backend.startswith("m"):
        show_image_mpl(image)
    else:
        show_image_cv2(image)
 def save_image(image, path):
    cv2.imwrite(path, image)
--- a/cv_analysis/utils/drawing.py
+++ b/cv_analysis/utils/drawing.py
@ -1,19 +1,23 @@
 from typing import Union
 import cv2
 import numpy as np
 from PIL import Image
 from cv_analysis.utils import copy_and_normalize_channels
-def draw_contours(image, contours, color=None, annotate=False):
+def draw_contours(image, contours):
    image = copy_and_normalize_channels(image)
-    for cont in contours:
+    for contour in contours:
-        cv2.drawContours(image, cont, -1, (0, 255, 0), 4)
+        cv2.drawContours(image, contour, -1, (0, 255, 0), 4)
    return image
-def draw_rectangles(image, rectangles, color=None, annotate=False):
+def draw_rectangles(image: Union[np.ndarray, Image.Image], rectangles, color=None, annotate=False, filled=False):
    def annotate_rect(x, y, w, h):
        cv2.putText(
            image,
@ -21,18 +25,18 @@ def draw_rectangles(image, rectangles, color=None, annotate=False):
            (x + (w // 2) - 12, y + (h // 2) + 9),
            cv2.FONT_HERSHEY_SIMPLEX,
            1,
-            (0, 255, 0),
+            (0, 255, 0, 255),
            2,
        )
    image = copy_and_normalize_channels(image)
    if not color:
-        color = (0, 255, 0)
+        color = (0, 255, 0, 255)
    for rect in rectangles:
        x, y, w, h = rect
-        cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
+        cv2.rectangle(image, (x, y), (x + w, y + h), color, -1 if filled else 1)
        if annotate:
            annotate_rect(x, y, w, h)
--- a/cv_analysis/utils/filters.py
+++ b/cv_analysis/utils/filters.py
@ -5,7 +5,7 @@ def is_large_enough(cont, min_area):
    return cv2.contourArea(cont, False) > min_area
-def is_not_too_large(cnt, max_area):
+def is_small_enough(cnt, max_area):
    return cv2.contourArea(cnt, False) < max_area
--- a/cv_analysis/utils/geometric.py
+++ b/cv_analysis/utils/geometric.py
@ -0,0 +1,13 @@
 from cv_analysis.utils.rectangle import Rectangle
 def is_square_like(box: Rectangle):
    return box.width / box.height > 0.5 and box.height / box.width > 0.5
 def is_wide(box: Rectangle):
    return box.width / box.height > 1.5
 def is_tall(box: Rectangle):
    return box.height / box.width > 1.5
--- a/cv_analysis/utils/image_operations.py
+++ b/cv_analysis/utils/image_operations.py
@ -0,0 +1,115 @@
 from typing import Tuple
 import cv2 as cv
 import numpy as np
 from PIL import ImageOps, Image
 from loguru import logger
 from cv_analysis.utils.conversion import normalize_image_format_to_pil
 Color = Tuple[int, int, int]
 def blur(image: np.ndarray):
    return cv.blur(image, (3, 3))
 def sharpen(image: np.ndarray):
    return cv.filter2D(image, -1, np.array([[-1, -1, -1], [-1, 6, -1], [-1, -1, -1]]))
 def overlay(images, mode=np.sum):
    assert mode in [np.sum, np.max]
    images = np.stack(list(images))
    image = mode(images, axis=0)
    image = (image / image.max() * 255).astype(np.uint8)
    return image
 def tint_image(src, color="#FFFFFF"):
    src.load()
    r, g, b, alpha = src.split()
    gray = ImageOps.grayscale(src)
    result = ImageOps.colorize(gray, (0, 0, 0), color)
    result.putalpha(alpha)
    return result
 def color_shift_array(image: np.ndarray, color: Color):
    """Creates a 3-tensor from a 2-tensor by stacking the 2-tensor three times weighted by the color tuple."""
    assert image.ndim == 3
    assert image.shape[-1] == 3
    assert isinstance(color, tuple)
    assert max(color) <= 255
    assert image.max() <= 255
    color = np.array(color)
    weights = color / color.sum() / 10
    assert max(weights) <= 1
    colored = (image * weights).astype(np.uint8)
    assert colored.shape == image.shape
    return colored
 def superimpose(
    base_image: Image,
    image_to_superimpose: Image,
    crop_to_content=True,
    pad=True,
 ) -> Image:
    """Superimposes an image with transparency onto another image.
    Args:
        base_image: The page image.
        image_to_superimpose: The texture image.
        crop_to_content: If True, the texture will be cropped to content (i.e. the bounding box of all non-transparent
            parts of the texture image).
        pad: If True, the texture will be padded to the size of the page.
    Returns:
        Image where the texture is superimposed onto the page.
    """
    base_image = normalize_image_format_to_pil(base_image)
    image_to_superimpose = normalize_image_format_to_pil(image_to_superimpose)
    if crop_to_content:
        image_to_superimpose = image_to_superimpose.crop(image_to_superimpose.getbbox())
    if base_image.size != image_to_superimpose.size:
        logger.trace(f"Size of page and texture do not match: {base_image.size} != {image_to_superimpose.size}")
        if pad:
            logger.trace(f"Padding texture before pasting to fit size {base_image.size}")
            image_to_superimpose = pad_image_to_size(image_to_superimpose, base_image.size)
        else:
            logger.trace(f"Resizing texture before pasting to fit size {base_image.size}")
            image_to_superimpose = image_to_superimpose.resize(base_image.size)
    assert base_image.size == image_to_superimpose.size
    assert image_to_superimpose.mode == "RGBA"
    base_image.paste(image_to_superimpose, (0, 0), image_to_superimpose)
    return base_image
 def pad_image_to_size(image: Image, size: Tuple[int, int]) -> Image:
    """Pads an image to a given size."""
    if image.size == size:
        return image
    if image.size[0] > size[0] or image.size[1] > size[1]:
        raise ValueError(f"Image size {image.size} is larger than target size {size}.")
    padded = Image.new(image.mode, size, color=255)
    pasting_coords = compute_pasting_coordinates(image, padded)
    assert image.mode == "RGBA"
    padded.paste(image, pasting_coords)
    return padded
 def compute_pasting_coordinates(smaller: Image, larger: Image.Image):
    """Computes the coordinates for centrally pasting a smaller image onto a larger image."""
    return abs(larger.width - smaller.width) // 2, abs(larger.height - smaller.height) // 2
--- a/cv_analysis/utils/input.py
+++ b/cv_analysis/utils/input.py
@ -0,0 +1,29 @@
 from numpy import array, ndarray
 import pdf2image
 from PIL import Image
 from cv_analysis.utils.preprocessing import preprocess_page_array
 def open_analysis_input_file(path_or_bytes, first_page=1, last_page=None):
    assert first_page > 0, "Page numbers are 1-based."
    assert last_page is None or last_page >= first_page, "last_page must be greater than or equal to first_page."
    last_page = last_page or first_page
    if type(path_or_bytes) == str:
        if path_or_bytes.lower().endswith((".png", ".jpg", ".jpeg")):
            pages = [Image.open(path_or_bytes)]
        elif path_or_bytes.lower().endswith(".pdf"):
            pages = pdf2image.convert_from_path(path_or_bytes, first_page=first_page, last_page=last_page)
        else:
            raise IOError("Invalid file extension. Accepted filetypes: .png, .jpg, .jpeg, .pdf")
    elif type(path_or_bytes) == bytes:
        pages = pdf2image.convert_from_bytes(path_or_bytes, first_page=first_page, last_page=last_page)
    elif type(path_or_bytes) in {list, ndarray}:
        return path_or_bytes
    pages = [preprocess_page_array(array(p)) for p in pages]
    return pages
--- a/cv_analysis/utils/merging.py
+++ b/cv_analysis/utils/merging.py
@ -0,0 +1,54 @@
 from functools import reduce
 from itertools import combinations
 from typing import List, Tuple, Set
 from funcy import all
 from cv_analysis.utils import until, make_merger_sentinel
 from cv_analysis.utils.rectangle import Rectangle
 from cv_analysis.utils.spacial import related
 def merge_related_rectangles(rectangles: List[Rectangle]) -> List[Rectangle]:
    """Merges rectangles that are related to each other, iterating on partial merge results until no more mergers are
    possible."""
    assert isinstance(rectangles, list)
    no_new_merges = make_merger_sentinel()
    return until(no_new_merges, merge_rectangles_once, rectangles)
 def merge_rectangles_once(rectangles: List[Rectangle]) -> List[Rectangle]:
    """Merges rectangles that are related to each other, but does not iterate on the results."""
    rectangles = set(rectangles)
    merged, used = reduce(merge_if_related, combinations(rectangles, 2), (set(), set()))
    return list(merged | rectangles - used)
 T = Tuple[Set[Rectangle], Set[Rectangle]]
 V = Tuple[Rectangle, Rectangle]
 def merge_if_related(merged_and_used_so_far: T, rectangle_pair: V) -> T:
    """Merges two rectangles if they are related, otherwise returns the accumulator unchanged."""
    alpha, beta = rectangle_pair
    merged, used = merged_and_used_so_far
    def unused(*args) -> bool:
        return not used & {*args}
    if all(unused, (alpha, beta)) and related(alpha, beta):
        return merged | {bounding_rect(alpha, beta)}, used | {alpha, beta}
    else:
        return merged, used
 def bounding_rect(alpha: Rectangle, beta: Rectangle) -> Rectangle:
    """Returns the smallest rectangle that contains both rectangles."""
    return Rectangle(
        min(alpha.x1, beta.x1),
        min(alpha.y1, beta.y1),
        max(alpha.x2, beta.x2),
        max(alpha.y2, beta.y2),
    )
--- a/cv_analysis/utils/metrics.py
+++ b/cv_analysis/utils/metrics.py
@ -0,0 +1,56 @@
 from functools import reduce
 from operator import itemgetter
 from typing import Iterable
 import numpy as np
 from funcy import lmap, lpluck, first
 from cv_analysis.utils import lift
 from cv_analysis.utils.rectangle import Rectangle
 def compute_document_score(result_dict, ground_truth_dicts):
    extract_cells = lambda dicts: lpluck("cells", dicts["pages"])
    cells_per_ground_truth_page, cells_per_result_page = map(extract_cells, (ground_truth_dicts, result_dict))
    cells_on_page_to_rectangles = lift(rectangle_from_dict)
    cells_on_pages_to_rectangles = lift(cells_on_page_to_rectangles)
    rectangles_per_ground_truth_page, rectangles_per_result_page = map(
        cells_on_pages_to_rectangles, (cells_per_ground_truth_page, cells_per_result_page)
    )
    scores = lmap(compute_page_iou, rectangles_per_result_page, rectangles_per_ground_truth_page)
    n_cells_per_page = np.array(lmap(len, cells_per_ground_truth_page))
    document_score = np.average(scores, weights=n_cells_per_page / n_cells_per_page.sum())
    return document_score
 def rectangle_from_dict(d):
    x1, y1, w, h = itemgetter("x", "y", "width", "height")(d)
    return Rectangle(x1, y1, x1 + w, y1 + h)
 def compute_page_iou(predicted_rectangles: Iterable[Rectangle], true_rectangles: Iterable[Rectangle]):
    def find_best_iou(sum_so_far_and_candidate_rectangles, true_rectangle):
        sum_so_far, predicted_rectangles = sum_so_far_and_candidate_rectangles
        best_match, best_iou = find_max_overlap(true_rectangle, predicted_rectangles)
        return sum_so_far + best_iou, predicted_rectangles - {best_match}
    predicted_rectangles = set(predicted_rectangles)
    true_rectangles = set(true_rectangles)
    iou_sum = first(reduce(find_best_iou, true_rectangles, (0, predicted_rectangles)))
    normalizing_factor = 1 / max(len(predicted_rectangles), len(true_rectangles))
    score = normalizing_factor * iou_sum
    return score
 def find_max_overlap(rectangle: Rectangle, candidate_rectangles: Iterable[Rectangle]):
    best_candidate_rectangle = max(candidate_rectangles, key=rectangle.iou)
    iou = rectangle.iou(best_candidate_rectangle)
    return best_candidate_rectangle, iou
--- a/cv_analysis/utils/morphing.py
+++ b/cv_analysis/utils/morphing.py
@ -0,0 +1,38 @@
 from typing import Tuple
 from PIL import Image
 from loguru import logger
 from cv_analysis.utils.image_operations import compute_pasting_coordinates
 from cv_analysis.utils.rectangle import Rectangle
 from synthesis.segment.content_rectangle import ContentRectangle
 def shrink_rectangle(rectangle: Rectangle, factor: float) -> Rectangle:
    x1, y1, x2, y2 = compute_scaled_coordinates(rectangle, (1 - factor))
    logger.trace(f"Shrinking {rectangle} by {factor} to ({x1}, {y1}, {x2}, {y2}).")
    assert x1 >= rectangle.x1
    assert y1 >= rectangle.y1
    assert x2 <= rectangle.x2
    assert y2 <= rectangle.y2
    shrunk_rectangle = Rectangle(x1, y1, x2, y2)
    if isinstance(rectangle, ContentRectangle):  # TODO: Refactor
        shrunk_rectangle = ContentRectangle(*shrunk_rectangle.coords, rectangle.content)
    return shrunk_rectangle
 def compute_scaled_coordinates(rectangle: Rectangle, factor: float) -> Tuple[int, int, int, int]:
    # FIXME: Refactor: Using image to compute coordinates is not clean
    image = Image.new("RGBA", (rectangle.width, rectangle.height))
    scaled = image.resize((int(rectangle.width * factor), int(rectangle.height * factor)))
    x1, y1 = compute_pasting_coordinates(scaled, image)
    x1 = rectangle.x1 + x1
    y1 = rectangle.y1 + y1
    x2, y2 = x1 + scaled.width, y1 + scaled.height
    return x1, y1, x2, y2
--- a/cv_analysis/utils/open_pdf.py
+++ b/cv_analysis/utils/open_pdf.py
@ -1,27 +0,0 @@
 from numpy import array, ndarray
 import pdf2image
 from PIL import Image
 from cv_analysis.utils.preprocessing import preprocess_page_array
 def open_pdf(pdf, first_page=0, last_page=None):
    first_page += 1
    last_page = None if last_page is None else last_page + 1
    if type(pdf) == str:
        if pdf.lower().endswith((".png", ".jpg", ".jpeg")):
            pages = [Image.open(pdf)]
        elif pdf.lower().endswith(".pdf"):
            pages = pdf2image.convert_from_path(pdf, first_page=first_page, last_page=last_page)
        else:
            raise IOError("Invalid file extension. Accepted filetypes:\n\t.png\n\t.jpg\n\t.jpeg\n\t.pdf")
    elif type(pdf) == bytes:
        pages = pdf2image.convert_from_bytes(pdf, first_page=first_page, last_page=last_page)
    elif type(pdf) in {list, ndarray}:
        return pdf
    pages = [preprocess_page_array(array(p)) for p in pages]
    return pages
--- a/cv_analysis/utils/postprocessing.py
+++ b/cv_analysis/utils/postprocessing.py
@ -1,15 +1,17 @@
 from collections import namedtuple
 from functools import partial
 from itertools import starmap, compress
-from typing import Iterable, List
+from typing import Iterable, List, Sequence
-from cv_analysis.utils.structures import Rectangle
+
 from funcy import lremove
 from cv_analysis.utils.rectangle import Rectangle
 def remove_overlapping(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
    def overlap(a: Rectangle, rect2: Rectangle) -> float:
        return a.intersection(rect2) > 0
-    def does_not_overlap(rect: Rectangle, rectangles: Iterable[Rectangle]) -> list:
+    def does_not_overlap(rect: Rectangle, rectangles: Iterable[Rectangle]) -> bool:
        return not any(overlap(rect, rect2) for rect2 in rectangles if not rect == rect2)
    rectangles = list(filter(partial(does_not_overlap, rectangles=rectangles), rectangles))
@ -17,15 +19,28 @@ def remove_overlapping(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
 def remove_included(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
-    keep = [rect for rect in rectangles if not rect.is_included(rectangles)]
+    rectangles_to_keep = [rect for rect in rectangles if not rect.is_included(rectangles)]
-    return keep
+    return rectangles_to_keep
 def remove_small(boxes: Iterable[Rectangle], page_width, page_height, min_percentage=0.13) -> List[Rectangle]:
    min_width = page_width * min_percentage
    min_height = page_height * min_percentage
    def small(box: Rectangle):
        return box.width < min_width or box.height < min_height
    return lremove(small, boxes)
 def __remove_isolated_unsorted(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
    def is_connected(rect: Rectangle, rectangles: Iterable[Rectangle]):
        return any(rect.adjacent(rect2) for rect2 in rectangles if not rect == rect2)
-    rectangles = list(filter(partial(is_connected, rectangles=list(rectangles)), rectangles))
+    if not isinstance(rectangles, list):
        rectangles = list(rectangles)
    rectangles = list(filter(partial(is_connected, rectangles=rectangles), rectangles))
    return rectangles
@ -42,9 +57,9 @@ def __remove_isolated_sorted(rectangles: Iterable[Rectangle]) -> List[Rectangle]
    return rectangles
-def remove_isolated(rectangles: Iterable[Rectangle], input_unsorted=True) -> List[Rectangle]:
+def remove_isolated(rectangles: Iterable[Rectangle], input_unsorted: bool = True) -> List[Rectangle]:
    return (__remove_isolated_unsorted if input_unsorted else __remove_isolated_sorted)(rectangles)
-def has_no_parent(hierarchy):
+def has_no_parent(hierarchy: Sequence[int]) -> bool:
    return hierarchy[-1] <= 0
--- a/cv_analysis/utils/rectangle.py
+++ b/cv_analysis/utils/rectangle.py
@ -0,0 +1,85 @@
 # See https://stackoverflow.com/a/33533514
 from __future__ import annotations
 from typing import Iterable, Union
 from funcy import identity
 from cv_analysis.utils.spacial import adjacent, contains, intersection, iou, area, is_contained
 Coord = Union[int, float]
 class Rectangle:
    def __init__(self, x1, y1, x2, y2, discrete=True):
        """Creates a rectangle from two points."""
        nearest_valid = int if discrete else identity
        self.__x1 = nearest_valid(x1)
        self.__y1 = nearest_valid(y1)
        self.__x2 = nearest_valid(x2)
        self.__y2 = nearest_valid(y2)
    def __repr__(self):
        return f"Rectangle({self.x1}, {self.y1}, {self.x2}, {self.y2})"
    @property
    def x1(self):
        return self.__x1
    @property
    def x2(self):
        return self.__x2
    @property
    def y1(self):
        return self.__y1
    @property
    def y2(self):
        return self.__y2
    @property
    def width(self):
        return abs(self.x2 - self.x1)
    @property
    def height(self):
        return abs(self.y2 - self.y1)
    @property
    def coords(self):
        return [self.x1, self.y1, self.x2, self.y2]
    def __hash__(self):
        return hash((self.x1, self.y1, self.x2, self.y2))
    def __iter__(self):
        yield self.x1
        yield self.y1
        yield self.width
        yield self.height
    def area(self):
        """Calculates the area of this rectangle."""
        return area(self)
    def intersection(self, other):
        """Calculates the intersection of this and the given other rectangle."""
        return intersection(self, other)
    def iou(self, other: Rectangle):
        """Calculates the intersection over union of this and the given other rectangle."""
        return iou(self, other)
    def includes(self, other: Rectangle, tol=3):
        """Checks if this rectangle contains the given other."""
        return contains(self, other, tol)
    def is_included(self, rectangles: Iterable[Rectangle]):
        """Checks if this rectangle is contained by any of the given rectangles."""
        return is_contained(self, rectangles)
    def adjacent(self, other: Rectangle, tolerance=7):
        """Checks if this rectangle is adjacent to the given other."""
        return adjacent(self, other, tolerance)
--- a/cv_analysis/utils/spacial.py
+++ b/cv_analysis/utils/spacial.py
@ -0,0 +1,286 @@
 # See https://stackoverflow.com/a/39757388
 from __future__ import annotations
 from functools import lru_cache
 from operator import attrgetter
 from typing import TYPE_CHECKING, Iterable
 from funcy import juxt, rpartial, compose, lflatten, first, second
 from cv_analysis.utils import lift
 if TYPE_CHECKING:
    from cv_analysis.utils.rectangle import Rectangle
 def adjacent(alpha: Rectangle, beta: Rectangle, tolerance=7, strict=False):
    """Checks if the two rectangles are adjacent to each other.
    Args:
        alpha: The first rectangle.
        beta: The second rectangle.
        tolerance: The maximum distance between the two rectangles.
        strict: If True, the rectangles must be adjacent along one axis and contained within the other axis. Else, the
            rectangles must be adjacent along one axis and overlapping the other axis.
    Returns:
        True if the two rectangles are adjacent to each other, False otherwise.
    """
    select_strictness_variant = first if strict else second
    test_candidates = [
        # +---+
        # |   | +---+
        # | a | | b |
        # |   | +___+
        # +___+
        (right_left_aligned_and_vertically_contained, right_left_aligned_and_vertically_overlapping),
        #       +---+
        # +---+ |   |
        # | b | | a |
        # +___+ |   |
        #       +___+
        (left_right_aligned_and_vertically_contained, left_right_aligned_and_vertically_overlapping),
        # +-----------+
        # |     a     |
        # +___________+
        #    +-----+
        #    |  b  |
        #    +_____+
        (bottom_top_aligned_and_horizontally_contained, bottom_top_aligned_and_horizontally_overlapping),
        #    +-----+
        #    |  b  |
        #    +_____+
        # +-----------+
        # |     a     |
        # +___________+
        (top_bottom_aligned_and_horizontally_contained, top_bottom_aligned_and_horizontally_overlapping),
    ]
    tests = map(select_strictness_variant, test_candidates)
    return any(juxt(*tests)(alpha, beta, tolerance))
 def right_left_aligned_and_vertically_overlapping(alpha: Rectangle, beta: Rectangle, tol):
    """Checks if the first rectangle is left of the other within a tolerance and also overlaps the other's y range."""
    return adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
        alpha.x2, beta.x1, beta.y1, beta.y2, alpha.y1, alpha.y2, tolerance=tol
    )
 def left_right_aligned_and_vertically_overlapping(alpha: Rectangle, beta: Rectangle, tol):
    """Checks if the first rectangle is right of the other within a tolerance and also overlaps the other's y range."""
    return adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
        alpha.x1, beta.x2, beta.y1, beta.y2, alpha.y1, alpha.y2, tolerance=tol
    )
 def bottom_top_aligned_and_horizontally_overlapping(alpha: Rectangle, beta: Rectangle, tol):
    """Checks if the first rectangle is above the other within a tolerance and also overlaps the other's x range."""
    return adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
        alpha.y2, beta.y1, beta.x1, beta.x2, alpha.x1, alpha.x2, tolerance=tol
    )
 def top_bottom_aligned_and_horizontally_overlapping(alpha: Rectangle, beta: Rectangle, tol):
    """Checks if the first rectangle is below the other within a tolerance and also overlaps the other's x range."""
    return adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
        alpha.y1, beta.y2, beta.x1, beta.x2, alpha.x1, alpha.x2, tolerance=tol
    )
 def right_left_aligned_and_vertically_contained(alpha: Rectangle, beta: Rectangle, tol):
    """Checks if the first rectangle is left of the other within a tolerance and also contains the other's y range."""
    return adjacent_along_one_axis_and_contained_within_perpendicular_axis(
        alpha.x2, beta.x1, beta.y1, beta.y2, alpha.y1, alpha.y2, tolerance=tol
    )
 def left_right_aligned_and_vertically_contained(alpha: Rectangle, beta: Rectangle, tol):
    """Checks if the first rectangle is right of the other within a tolerance and also contains the other's y range."""
    return adjacent_along_one_axis_and_contained_within_perpendicular_axis(
        alpha.x1, beta.x2, beta.y1, beta.y2, alpha.y1, alpha.y2, tolerance=tol
    )
 def bottom_top_aligned_and_horizontally_contained(alpha: Rectangle, beta: Rectangle, tol):
    """Checks if the first rectangle is above the other within a tolerance and also contains the other's x range."""
    return adjacent_along_one_axis_and_contained_within_perpendicular_axis(
        alpha.y2, beta.y1, beta.x1, beta.x2, alpha.x1, alpha.x2, tolerance=tol
    )
 def top_bottom_aligned_and_horizontally_contained(alpha: Rectangle, beta: Rectangle, tol):
    """Checks if the first rectangle is below the other within a tolerance and also contains the other's x range."""
    return adjacent_along_one_axis_and_contained_within_perpendicular_axis(
        alpha.y1, beta.y2, beta.x1, beta.x2, alpha.x1, alpha.x2, tolerance=tol
    )
 def adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
    axis_0_point_1,
    axis_1_point_2,
    axis_1_contained_point_1,
    axis_1_contained_point_2,
    axis_1_lower_bound,
    axis_1_upper_bound,
    tolerance,
 ):
    """Checks if two points are adjacent along one axis and two other points overlap a range along the perpendicular
    axis.
    """
    return adjacent_along_one_axis_and_overlapping_or_contained_along_perpendicular_axis(
        axis_0_point_1,
        axis_1_point_2,
        axis_1_contained_point_1,
        axis_1_contained_point_2,
        axis_1_lower_bound,
        axis_1_upper_bound,
        tolerance,
        mode="overlapping",
    )
 def adjacent_along_one_axis_and_contained_within_perpendicular_axis(
    axis_0_point_1,
    axis_1_point_2,
    axis_1_contained_point_1,
    axis_1_contained_point_2,
    axis_1_lower_bound,
    axis_1_upper_bound,
    tolerance,
 ):
    """Checks if two points are adjacent along one axis and two other points overlap a range along the perpendicular
    axis.
    """
    return adjacent_along_one_axis_and_overlapping_or_contained_along_perpendicular_axis(
        axis_0_point_1,
        axis_1_point_2,
        axis_1_contained_point_1,
        axis_1_contained_point_2,
        axis_1_lower_bound,
        axis_1_upper_bound,
        tolerance,
        mode="contained",
    )
 def adjacent_along_one_axis_and_overlapping_or_contained_along_perpendicular_axis(
    axis_0_point_1,
    axis_1_point_2,
    axis_1_contained_point_1,
    axis_1_contained_point_2,
    axis_1_lower_bound,
    axis_1_upper_bound,
    tolerance,
    mode,
 ):
    """Checks if two points are adjacent along one axis and two other points overlap a range along the perpendicular
    axis or are contained in that range, depending on the mode specified.
    """
    assert mode in ["overlapping", "contained"]
    quantifier = any if mode == "overlapping" else all
    return all(
        [
            abs(axis_0_point_1 - axis_1_point_2) <= tolerance,
            quantifier(
                [
                    axis_1_lower_bound <= p <= axis_1_upper_bound
                    for p in [axis_1_contained_point_1, axis_1_contained_point_2]
                ]
            ),
        ]
    )
 def contains(alpha: Rectangle, beta: Rectangle, tol=3):
    """Checks if the first rectangle contains the second rectangle."""
    return (
        beta.x1 + tol >= alpha.x1
        and beta.y1 + tol >= alpha.y1
        and beta.x2 - tol <= alpha.x2
        and beta.y2 - tol <= alpha.y2
    )
 def is_contained(rectangle: Rectangle, rectangles: Iterable[Rectangle]):
    """Checks if the rectangle is contained within any of the other rectangles."""
    other_rectangles = filter(lambda r: r != rectangle, rectangles)
    return any(map(rpartial(contains, rectangle), other_rectangles))
 def iou(alpha: Rectangle, beta: Rectangle):
    """Calculates the intersection area over the union area of two rectangles."""
    return intersection(alpha, beta) / union(alpha, beta)
 def area(rectangle: Rectangle):
    """Calculates the area of a rectangle."""
    return abs((rectangle.x2 - rectangle.x1) * (rectangle.y2 - rectangle.y1))
 def union(alpha: Rectangle, beta: Rectangle):
    """Calculates the union area of two rectangles."""
    return area(alpha) + area(beta) - intersection(alpha, beta)
@lru_cache(maxsize=1000)
 def intersection(alpha, beta):
    """Calculates the intersection of two rectangles."""
    return intersection_along_x_axis(alpha, beta) * intersection_along_y_axis(alpha, beta)
 def intersection_along_x_axis(alpha, beta):
    """Calculates the intersection along the x-axis."""
    return intersection_along_axis(alpha, beta, "x")
 def intersection_along_y_axis(alpha, beta):
    """Calculates the intersection along the y-axis."""
    return intersection_along_axis(alpha, beta, "y")
 def intersection_along_axis(alpha, beta, axis):
    """Calculates the intersection along the given axis.
    Cases:
       a      b
    [-----] (---)  ==> [a1, b1, a2, b2] ==> max(0, (a2 - b1)) = 0
      b      a
    (---) [-----]  ==> [b1, a1, b2, a2] ==> max(0, (b2 - a1)) = 0
        a   b
    [--(----]----) ==> [a1, b1, a2, b2] ==> max(0, (a2 - b1)) = (a2 - b1)
        a b
    (-[---]----)   ==> [b1, a1, a2, b2] ==> max(0, (a2 - a1)) = (a2 - a1)
        b a
    [-(---)----]   ==> [a1, b1, b2, a2] ==> max(0, (b2 - b1)) = (b2 - b1)
        b    a
    (----[--)----] ==> [b1, a1, b2, a2] ==> max(0, (b2 - a1)) = (b2 - a1)
    """
    assert axis in ["x", "y"]
    def get_component_accessor(component):
        """Returns a function that accesses the given component of a rectangle."""
        return attrgetter(f"{axis}{component}")
    def make_access_components_and_sort_fn(component):
        """Returns a function that accesses and sorts the given component of multiple rectangles."""
        assert component in [1, 2]
        return compose(sorted, lift(get_component_accessor(component)))
    sort_first_components, sort_second_components = map(make_access_components_and_sort_fn, [1, 2])
    min_c1, max_c1, min_c2, max_c2 = lflatten(juxt(sort_first_components, sort_second_components)((alpha, beta)))
    intersection = max(0, min_c2 - max_c1)
    return intersection
 def related(alpha: Rectangle, beta: Rectangle):
    return close(alpha, beta) or overlap(alpha, beta)
 def close(alpha: Rectangle, beta: Rectangle, max_gap=14):
    # FIXME: Parameterize via factory
    return adjacent(alpha, beta, tolerance=max_gap, strict=True)
 def overlap(alpha: Rectangle, beta: Rectangle):
    return intersection(alpha, beta) > 0
--- a/cv_analysis/utils/structures.py
+++ b/cv_analysis/utils/structures.py
@ -1,131 +0,0 @@
 from json import dumps
 from typing import Iterable
 import numpy as np
 from funcy import identity
 class Rectangle:
    def __init__(self, x1=None, y1=None, w=None, h=None, x2=None, y2=None, indent=4, format="xywh", discrete=True):
        make_discrete = int if discrete else identity
        try:
            self.x1 = make_discrete(x1)
            self.y1 = make_discrete(y1)
            self.w = make_discrete(w) if w else make_discrete(x2 - x1)
            self.h = make_discrete(h) if h else make_discrete(y2 - y1)
            self.x2 = make_discrete(x2) if x2 else self.x1 + self.w
            self.y2 = make_discrete(y2) if y2 else self.y1 + self.h
            assert np.isclose(self.x1 + self.w, self.x2)
            assert np.isclose(self.y1 + self.h, self.y2)
            self.indent = indent
            self.format = format
        except Exception as err:
            raise Exception("x1, y1, (w|x2), and (h|y2) must be defined.") from err
    def json_xywh(self):
        return {"x": self.x1, "y": self.y1, "width": self.w, "height": self.h}
    def json_xyxy(self):
        return {"x1": self.x1, "y1": self.y1, "x2": self.x2, "y2": self.y2}
    def json_full(self):
        # TODO: can we make all coords x0, y0 based? :)
        return {
            "x0": self.x1,
            "y0": self.y1,
            "x1": self.x2,
            "y1": self.y2,
            "width": self.w,
            "height": self.h,
        }
    def json(self):
        json_func = {"xywh": self.json_xywh, "xyxy": self.json_xyxy}.get(self.format, self.json_full)
        return json_func()
    def xyxy(self):
        return self.x1, self.y1, self.x2, self.y2
    def xywh(self):
        return self.x1, self.y1, self.w, self.h
    def intersection(self, rect):
        bx1, by1, bx2, by2 = rect.xyxy()
        if (self.x1 > bx2) or (bx1 > self.x2) or (self.y1 > by2) or (by1 > self.y2):
            return 0
        intersection_ = (min(self.x2, bx2) - max(self.x1, bx1)) * (min(self.y2, by2) - max(self.y1, by1))
        return intersection_
    def area(self):
        return (self.x2 - self.x1) * (self.y2 - self.y1)
    def iou(self, rect):
        intersection = self.intersection(rect)
        if intersection == 0:
            return 0
        union = self.area() + rect.area() - intersection
        return intersection / union
    def includes(self, other: "Rectangle", tol=3):
        """does a include b?"""
        return (
            other.x1 + tol >= self.x1
            and other.y1 + tol >= self.y1
            and other.x2 - tol <= self.x2
            and other.y2 - tol <= self.y2
        )
    def is_included(self, rectangles: Iterable["Rectangle"]):
        return any(rect.includes(self) for rect in rectangles if not rect == self)
    def adjacent(self, rect2: "Rectangle", tolerance=7):
        # tolerance=1 was set too low; most lines are 2px wide
        def adjacent2d(sixtuple):
            g, h, i, j, k, l = sixtuple
            return (abs(g - h) <= tolerance) and any(k <= p <= l for p in [i, j])
        if rect2 is None:
            return False
        return any(
            map(
                adjacent2d,
                [
                    (self.x2, rect2.x1, rect2.y1, rect2.y2, self.y1, self.y2),
                    (self.x1, rect2.x2, rect2.y1, rect2.y2, self.y1, self.y2),
                    (self.y2, rect2.y1, rect2.x1, rect2.x2, self.x1, self.x2),
                    (self.y1, rect2.y2, rect2.x1, rect2.x2, self.x1, self.x2),
                ],
            )
        )
    @classmethod
    def from_xyxy(cls, xyxy_tuple, discrete=True):
        x1, y1, x2, y2 = xyxy_tuple
        return cls(x1=x1, y1=y1, x2=x2, y2=y2, discrete=discrete)
    @classmethod
    def from_xywh(cls, xywh_tuple, discrete=True):
        x, y, w, h = xywh_tuple
        return cls(x1=x, y1=y, w=w, h=h, discrete=discrete)
    @classmethod
    def from_dict_xywh(cls, xywh_dict, discrete=True):
        return cls(x1=xywh_dict["x"], y1=xywh_dict["y"], w=xywh_dict["width"], h=xywh_dict["height"], discrete=discrete)
    def __str__(self):
        return dumps(self.json(), indent=self.indent)
    def __repr__(self):
        return str(self.json())
    def __iter__(self):
        return list(self.json().values()).__iter__()
    def __eq__(self, rect):
        return all([self.x1 == rect.x1, self.y1 == rect.y1, self.w == rect.w, self.h == rect.h])
 class Contour:
    def __init__(self):
        pass
--- a/cv_analysis/utils/test_metrics.py
+++ b/cv_analysis/utils/test_metrics.py
@ -1,61 +0,0 @@
 from typing import Iterable
 import numpy as np
 from cv_analysis.utils.structures import Rectangle
 def find_max_overlap(box: Rectangle, box_list: Iterable[Rectangle]):
    best_candidate = max(box_list, key=lambda x: box.iou(x))
    iou = box.iou(best_candidate)
    return best_candidate, iou
 def compute_page_iou(results_boxes: Iterable[Rectangle], ground_truth_boxes: Iterable[Rectangle]):
    results = list(results_boxes)
    truth = list(ground_truth_boxes)
    if (not results) or (not truth):
        return 0
    iou_sum = 0
    denominator = max(len(results), len(truth))
    while results and truth:
        gt_box = truth.pop()
        best_match, best_iou = find_max_overlap(gt_box, results)
        results.remove(best_match)
        iou_sum += best_iou
    score = iou_sum / denominator
    return score
 def compute_document_score(results_dict, annotation_dict):
    page_weights = np.array([len(page["cells"]) for page in annotation_dict["pages"]])
    page_weights = page_weights / sum(page_weights)
    scores = []
    for i in range(len(annotation_dict["pages"])):
        scores.append(
            compute_page_iou(
                map(Rectangle.from_dict_xywh, results_dict["pages"][i]["cells"]),
                map(Rectangle.from_dict_xywh, annotation_dict["pages"][i]["cells"]),
            )
        )
    doc_score = np.average(np.array(scores), weights=page_weights)
    return doc_score
 """
 from cv_analysis.utils.test_metrics import *
 r1 = Rectangle.from_dict_xywh({'x': 30, 'y': 40, 'width': 50, 'height': 60})
 r2 = Rectangle.from_dict_xywh({'x': 40, 'y': 30, 'width': 55, 'height': 65})
 r3 = Rectangle.from_dict_xywh({'x': 45, 'y': 35, 'width': 45, 'height': 55})
 r4 = Rectangle.from_dict_xywh({'x': 25, 'y': 45, 'width': 45, 'height': 55})
 d1 = {"pages": [{"cells": [r1.json_xywh(), r2.json_xywh()]}]}
 d2 = {"pages": [{"cells": [r3.json_xywh(), r4.json_xywh()]}]}
 compute_iou_from_boxes(r1, r2)
 find_max_overlap(r1, [r2, r3, r4])
 compute_page_iou([r1, r2], [r3, r4])
 compute_document_score(d1, d2)
 """
--- a/cv_analysis/utils/utils.py
+++ b/cv_analysis/utils/utils.py
@ -1,9 +1,19 @@
-from numpy import generic
+from __future__ import annotations
 import itertools
 import cv2
 import numpy as np
 from PIL import Image
 from funcy import first, iterate, keep, lmap, repeatedly
 from numpy import generic
 def copy_and_normalize_channels(image):
    if isinstance(image, Image.Image):
        image = np.array(image)
    image = image.copy()
    try:
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
@ -17,3 +27,64 @@ def npconvert(ob):
    if isinstance(ob, generic):
        return ob.item()
    raise TypeError
 def lift(fn):
    def lifted(coll):
        yield from map(fn, coll)
    return lifted
 def star(fn):
    def starred(args):
        return fn(*args)
    return starred
 def lstarkeep(fn, coll):
    return list(starkeep(fn, coll))
 def starkeep(fn, coll):
    yield from keep(star(fn), coll)
 def until(cond, func, *args, **kwargs):
    return first(filter(cond, iterate(func, *args, **kwargs)))
 def conj(x, xs):
    return [x, *xs]
 def rconj(xs, x):
    return [*xs, x]
 def make_merger_sentinel():
    def no_new_mergers(records):
        nonlocal number_of_records_so_far
        number_of_records_now = len(records)
        if number_of_records_now == number_of_records_so_far:
            return True
        else:
            number_of_records_so_far = number_of_records_now
            return False
    number_of_records_so_far = -1
    return no_new_mergers
 def zipmap(fn, boxes, n=2):
    rets = lmap(list, zip(*map(fn, boxes)))
    yield from repeatedly(lambda: [], n) if len(rets) < n else rets
 def every_nth(n, iterable):
    return itertools.islice(iterable, 0, None, n)
--- a/cv_analysis/utils/visual_logging.py
+++ b/cv_analysis/utils/visual_logging.py
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -36,6 +36,19 @@ loguru = "^0.6.0"
 pytest = "^7.0.1"
 [tool.poetry.group.test.dependencies]
 albumentations = "^1.3.0"
 faker = "^16.4.0"
 pandas = "^1.5.2"
 pytablewriter = "^0.64.2"
 dataframe-image = "^0.1.5"
 blend-modes = "^2.1.0"
 [tool.poetry.group.dev.dependencies]
 ipython = "^8.9.0"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
--- a/scripts/annotate.py
+++ b/scripts/annotate.py
@ -1,50 +1,75 @@
 """
 Usage:
 python scripts/annotate.py /home/iriley/Documents/pdf/scanned/10.pdf 5 --type table --show
 python scripts/annotate.py /home/iriley/Documents/pdf/scanned/10.pdf 5 --type redaction --show
 python scripts/annotate.py /home/iriley/Documents/pdf/scanned/10.pdf 5 --type layout --show
 python scripts/annotate.py /home/iriley/Documents/pdf/scanned/10.pdf 5 --type figure --show
 """
 import argparse
 import loguru
 from cv_analysis.figure_detection.figure_detection import detect_figures
 from cv_analysis.layout_parsing import parse_layout
 from cv_analysis.redaction_detection import find_redactions
 from cv_analysis.table_parsing import parse_tables
 from cv_analysis.utils.display import show_image
-from cv_analysis.utils.draw import draw_contours, draw_rectangles
+from cv_analysis.utils.drawing import draw_contours, draw_rectangles
-from cv_analysis.utils.open_pdf import open_pdf
+from cv_analysis.utils.input import open_analysis_input_file
 from cv_analysis.utils.visual_logging import vizlogger
 def parse_args():
-    parser = argparse.ArgumentParser()
+    parser = argparse.ArgumentParser(
        description="Annotate PDF pages with detected elements. Specified pages form a closed interval and are 1-based."
    )
    parser.add_argument("pdf_path")
-    parser.add_argument("--page_index", type=int, default=0)
+    parser.add_argument(
-    parser.add_argument("--type", choices=["table", "redaction", "layout", "figure"], default="table")
+        "--first_page",
-    parser.add_argument("--show", action="store_true", default=False)
+        "-f",
        type=int,
        default=1,
    )
    parser.add_argument(
        "-last_page",
        "-l",
        help="if not specified, defaults to the value of the first page specified",
        type=int,
        default=None,
    )
    parser.add_argument(
        "--type",
        "-t",
        help="element type to look for and analyze",
        choices=["table", "redaction", "layout", "figure"],
        default="table",
    )
    parser.add_argument("--page", "-p", type=int, default=1)
    args = parser.parse_args()
    return args
-def annotate_page(page_image, analysis_function, drawing_function, name="tmp.png", show=True):
+def annotate_page(page_image, analysis_fn, draw_fn):
-    result = analysis_function(page_image)
+    result = analysis_fn(page_image)
-    page_image = drawing_function(page_image, result)
+    page_image = draw_fn(page_image, result)
    vizlogger.debug(page_image, name)
    show_image(page_image)
-if __name__ == "__main__":
+def get_analysis_and_draw_fn_for_type(element_type):
-    args = parse_args()
+    analysis_fn, draw_fn = {
-    page = open_pdf(args.pdf_path, first_page=args.page_index, last_page=args.page_index)[0]
+        "table": (parse_tables, draw_rectangles),
-    name = f"{args.type}_final_result.png"
+        "redaction": (find_redactions, draw_contours),
-    draw = draw_rectangles
+        "layout": (parse_layout, draw_rectangles),
-    if args.type == "table":
+        "figure": (detect_figures, draw_rectangles),
-        from cv_analysis.table_parsing import parse_tables as analyze
+    }[element_type]
    elif args.type == "redaction":
        from cv_analysis.redaction_detection import find_redactions as analyze
-        draw = draw_contours
+    return analysis_fn, draw_fn
-    elif args.type == "layout":
+
-        from cv_analysis.layout_parsing import parse_layout as analyze
+
-    elif args.type == "figure":
+def main(args):
-        from cv_analysis.figure_detection.figure_detection import detect_figures
+    loguru.logger.info(f"Annotating {args.type}s in {args.pdf_path}...")
-        analyze = detect_figures
+
-    annotate_page(page, analyze, draw, name=name, show=args.show)
+    pages = open_analysis_input_file(args.pdf_path, first_page=args.first_page, last_page=args.last_page)
    for page in pages:
        analysis_fn, draw_fn = get_analysis_and_draw_fn_for_type(args.type)
        annotate_page(page, analysis_fn, draw_fn)
 if __name__ == "__main__":
    try:
        main(parse_args())
    except KeyboardInterrupt:
        pass
--- a/scripts/annotate_pdf.py
+++ b/scripts/annotate_pdf.py
@ -10,7 +10,7 @@ from funcy import lmap
 from cv_analysis.figure_detection.figure_detection import detect_figures
 from cv_analysis.layout_parsing import parse_layout
 from cv_analysis.table_parsing import parse_tables
-from cv_analysis.utils.draw import draw_rectangles
+from cv_analysis.utils.drawing import draw_rectangles
 from pdf2img.conversion import convert_pages_to_images
--- a/scripts/run_analysis_pipeline.py
+++ b/scripts/run_analysis_pipeline.py
@ -2,28 +2,27 @@ import argparse
 import json
 from pathlib import Path
-from cv_analysis.server.pipeline import get_analysis_pipeline
+from loguru import logger
 from cv_analysis.server.pipeline import make_analysis_pipeline_for_element_type
 def parse_args():
    parser = argparse.ArgumentParser()
-    parser.add_argument("pdf")
+    parser.add_argument("pdf", type=Path)
-    parser.add_argument("--type", "-t", choices=["table", "layout", "figure"], required=True)
+    parser.add_argument("--element_type", "-t", choices=["table", "figure"], required=True)
    return parser.parse_args()
 def main(args):
    analysis_fn = make_analysis_pipeline_for_element_type(args.element_type)
    logger.info(f"Analysing document for {args.element_type}s...")
    results = list(analysis_fn(args.pdf.read_bytes()))
    print(json.dumps(results, indent=2))
 if __name__ == "__main__":
-    args = parse_args()
+    main(parse_args())
    analysis_fn = get_analysis_pipeline(args.type)
    with open(args.pdf, "rb") as f:
        pdf_bytes = f.read()
    results = list(analysis_fn(pdf_bytes))
    folder = Path(args.pdf).parent
    file_stem = Path(args.pdf).stem
    with open(f"{folder}/{file_stem}_{args.type}.json", "w+") as f:
        json.dump(results, f, indent=2)
--- a/src/serve.py
+++ b/src/serve.py
@ -4,7 +4,7 @@ import logging
 from operator import itemgetter
 from cv_analysis.config import get_config
-from cv_analysis.server.pipeline import get_analysis_pipeline
+from cv_analysis.server.pipeline import make_analysis_pipeline_for_segment_type
 from cv_analysis.utils.banner import make_art
 from pyinfra import config as pyinfra_config
 from pyinfra.queue.queue_manager import QueueManager
@ -31,7 +31,10 @@ def analysis_callback(queue_message: dict):
        should_publish_result = True
        object_bytes = gzip.decompress(storage.get_object(bucket, object_name))
-        analysis_fn = get_analysis_pipeline(operation, CV_CONFIG.table_parsing_skip_pages_without_images)
+        analysis_fn = make_analysis_pipeline_for_segment_type(
            operation,
            skip_pages_without_images=CV_CONFIG.table_parsing_skip_pages_without_images,
        )
        results = analysis_fn(object_bytes)
        response = {**queue_message, "data": list(results)}
--- a/synthesis/init.py
+++ b/synthesis/init.py
@ -0,0 +1,17 @@
 import argparse
 def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument()
    args = parser.parse_args()
    return args
 def main(args):
    pass
 if __name__ == "__main__":
    main(parse_args())
--- a/synthesis/content_generator.py
+++ b/synthesis/content_generator.py
@ -0,0 +1,47 @@
 import itertools
 from typing import List
 from PIL import Image
 from funcy import lsplit, lfilter
 from cv_analysis.utils import every_nth, zipmap
 from cv_analysis.utils.geometric import is_square_like
 from cv_analysis.utils.merging import merge_related_rectangles
 from cv_analysis.utils.postprocessing import remove_included, remove_overlapping
 from cv_analysis.utils.rectangle import Rectangle
 from synthesis.random import rnd
 from synthesis.segment.segments import (
    generate_random_text_block,
    generate_recursive_random_table_with_caption,
    generate_random_plot_with_caption,
 )
 class ContentGenerator:
    def __init__(self):
        self.constrain_layouts = True
    def __call__(self, boxes: List[Rectangle]) -> Image:
        rnd.shuffle(boxes)
        figure_boxes, text_boxes = lsplit(is_square_like, boxes)
        if self.constrain_layouts:
            figure_boxes = merge_related_rectangles(figure_boxes)
            figure_boxes = lfilter(is_square_like, figure_boxes)
            text_boxes = merge_related_rectangles(text_boxes)
        boxes = list(
            itertools.chain(
                map(generate_random_text_block, every_nth(2, text_boxes)),
                *zipmap(generate_recursive_random_table_with_caption, every_nth(2, text_boxes[1:])),
                *zipmap(generate_recursive_random_table_with_caption, every_nth(2, figure_boxes)),
                *zipmap(generate_random_plot_with_caption, every_nth(2, figure_boxes[1:])),
            )
        )
        if self.constrain_layouts:
            boxes = remove_included(boxes)
            boxes = remove_overlapping(boxes)
        return boxes
--- a/synthesis/partitioner/init.py
+++ b/synthesis/partitioner/init.py
--- a/synthesis/partitioner/page_partitioner.py
+++ b/synthesis/partitioner/page_partitioner.py
@ -0,0 +1,71 @@
 import abc
 from typing import List, Tuple
 from PIL import Image
 from funcy import lflatten
 from cv_analysis.utils.rectangle import Rectangle
 from synthesis.random import rnd
 class PagePartitioner(abc.ABC):
    # TODO: produce boxes for page numbers, headers and footers
    def __init__(self):
        self.left_margin_percentage = 0.05
        self.right_margin_percentage = 0.05
        self.top_margin_percentage = 0.1
        self.bottom_margin_percentage = 0.1
        self.recursive_margin_percentage = 0.007
        self.max_recursion_depth = 3
        self.initial_recursion_probability = 1
        self.recursion_probability_decay = 0.1
    def __call__(self, page: Image.Image) -> List[Rectangle]:
        left_margin = int(page.width * self.left_margin_percentage)
        right_margin = int(page.width * self.right_margin_percentage)
        top_margin = int(page.height * self.top_margin_percentage)
        bottom_margin = int(page.height * self.bottom_margin_percentage)
        box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin)
        boxes = lflatten(self.generate_content_boxes(box))
        return boxes
    @abc.abstractmethod
    def generate_content_boxes(self, box: Rectangle, depth=0):
        raise NotImplementedError
    def generate_child_boxes(self, box: Rectangle, axis, split_percentage=0.5) -> Tuple[Rectangle, Rectangle]:
        assert axis in ["x", "y"]
        edge_anchor_point, edge_length = (box.x1, box.width) if axis == "x" else (box.y1, box.height)
        split_coordinate = split_percentage * edge_length + edge_anchor_point
        child_boxes = get_child_boxes(box, split_coordinate, axis, self.recursive_margin_percentage)
        return child_boxes
    def recurse(self, depth):
        return rnd.random() <= self.recursion_probability(depth)
    def recursion_probability(self, depth):
        return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth
 def get_child_boxes(box: Rectangle, split_coordinate, axis, margin_percentage) -> Tuple[Rectangle, Rectangle]:
    assert axis in ["x", "y"]
    def low(point_1d):
        return point_1d * (1 + margin_percentage)
    def high(point_1d):
        return point_1d * (1 - margin_percentage)
    if axis == "x":
        return (
            Rectangle(low(box.x1), low(box.y1), high(split_coordinate), high(box.y2)),
            Rectangle(low(split_coordinate), low(box.y1), high(box.x2), high(box.y2)),
        )
    else:
        return (
            Rectangle(low(box.x1), low(box.y1), high(box.x2), high(split_coordinate)),
            Rectangle(low(box.x1), low(split_coordinate), high(box.x2), high(box.y2)),
        )
--- a/synthesis/partitioner/random.py
+++ b/synthesis/partitioner/random.py
@ -0,0 +1,22 @@
 from cv_analysis.utils.rectangle import Rectangle
 from synthesis.partitioner.page_partitioner import PagePartitioner
 from synthesis.random import rnd
 class RandomPagePartitioner(PagePartitioner):
    def __init__(self):
        super().__init__()
    def generate_content_boxes(self, box: Rectangle, depth=0):
        if depth >= self.max_recursion_depth:
            yield box
        else:
            child_boxes = self.generate_child_boxes(
                box,
                axis=rnd.choice(["x", "y"]),
                split_percentage=rnd.uniform(0.3, 0.7),
            )
            if self.recurse(depth):
                yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
            else:
                yield child_boxes
--- a/synthesis/partitioner/two_column.py
+++ b/synthesis/partitioner/two_column.py
@ -0,0 +1,25 @@
 from cv_analysis.utils.rectangle import Rectangle
 from synthesis.partitioner.page_partitioner import PagePartitioner
 from synthesis.random import rnd
 class TwoColumnPagePartitioner(PagePartitioner):
    def __init__(self):
        super().__init__()
        self.max_recursion_depth = 3
    def generate_content_boxes(self, box: Rectangle, depth=0):
        if depth >= self.max_recursion_depth:
            yield box
        else:
            if depth == 0:
                axis = "x"
                split_percentage = 0.5
            else:
                axis = "y"
                split_percentage = rnd.choice([0.3, 0.7])
            child_boxes = self.generate_child_boxes(box, axis=axis, split_percentage=split_percentage)
            yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
--- a/synthesis/random.py
+++ b/synthesis/random.py
@ -0,0 +1,34 @@
 import random
 from functools import lru_cache
 from loguru import logger
 random_seed = random.randint(0, 2**32 - 1)
 # random_seed = 2973413116
 # random_seed = 2212357755
 # random_seed = 2987558464  # light green
 # random_seed = 1173898033  # strange bar plot
 # 2467967671
 logger.info(f"Random seed: {random_seed}")
 rnd = random.Random(random_seed)
 def maybe():
    return rnd.random() > 0.9
 def possibly():
    return rnd.random() > 0.5
 def probably():
    return rnd.random() > 0.4
@lru_cache(maxsize=None)
 def get_random_seed():
    return rnd.randint(0, 2**32 - 1)
--- a/synthesis/segment/init.py
+++ b/synthesis/segment/init.py
@ -0,0 +1,17 @@
 import argparse
 def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument()
    args = parser.parse_args()
    return args
 def main(args):
    pass
 if __name__ == "__main__":
    main(parse_args())
--- a/synthesis/segment/content_rectangle.py
+++ b/synthesis/segment/content_rectangle.py
@ -0,0 +1,10 @@
 from cv_analysis.utils.rectangle import Rectangle
 class ContentRectangle(Rectangle):
    def __init__(self, x1, y1, x2, y2, content=None):
        super().__init__(x1, y1, x2, y2)
        self.content = content
    def __repr__(self):
        return f"{self.__class__.__name__}({self.x1}, {self.y1}, {self.x2}, {self.y2}, content={self.content})"
--- a/synthesis/segment/plot.py
+++ b/synthesis/segment/plot.py
@ -0,0 +1,192 @@
 import io
 import random
 from functools import lru_cache, partial
 import loguru
 import numpy as np
 from PIL import Image
 from matplotlib import pyplot as plt
 from matplotlib.colors import ListedColormap
 from cv_analysis.utils.geometric import is_square_like, is_wide, is_tall
 from cv_analysis.utils.image_operations import superimpose
 from cv_analysis.utils.rectangle import Rectangle
 from synthesis.random import rnd, probably, maybe
 from synthesis.segment.random_content_rectangle import RandomContentRectangle
 from synthesis.text.text import generate_random_words
 class RandomPlot(RandomContentRectangle):
    def __init__(self, x1, y1, x2, y2, seed=None):
        super().__init__(x1, y1, x2, y2, seed=seed)
        self.cmap = pick_colormap()
    def __call__(self, *args, **kwargs):
        pass
    def generate_random_plot(self, rectangle: Rectangle):
        if is_square_like(rectangle):
            plt_fn = rnd.choice(
                [
                    self.generate_random_line_plot,
                    self.generate_random_bar_plot,
                    self.generate_random_scatter_plot,
                    self.generate_random_histogram,
                    self.generate_random_pie_chart,
                ]
            )
        elif is_wide(rectangle):
            plt_fn = rnd.choice(
                [
                    self.generate_random_line_plot,
                    self.generate_random_histogram,
                    self.generate_random_bar_plot,
                ]
            )
        elif is_tall(rectangle):
            plt_fn = rnd.choice(
                [
                    self.generate_random_bar_plot,
                    self.generate_random_histogram,
                ]
            )
        else:
            plt_fn = self.generate_random_scatter_plot
        plt_fn(rectangle)
    def generate_random_bar_plot(self, rectangle: Rectangle):
        x = sorted(np.random.randint(low=1, high=11, size=5))
        y = np.random.randint(low=1, high=11, size=5)
        bar_fn = partial(
            plt.bar,
            log=random.choice([True, False]),
        )
        self.__generate_random_plot(bar_fn, rectangle, x, y)
    def generate_random_line_plot(self, rectangle: Rectangle):
        f = rnd.choice([np.sin, np.cos, np.tan, np.exp, np.log, np.sqrt, np.square])
        x = np.linspace(0, 10, 100)
        y = f(x)
        plot_fn = partial(
            plt.plot,
        )
        self.__generate_random_plot(plot_fn, rectangle, x, y)
    def generate_random_scatter_plot(self, rectangle: Rectangle):
        n = rnd.randint(10, 40)
        x = np.random.normal(size=n)
        y = np.random.normal(size=n)
        scatter_fn = partial(
            plt.scatter,
            cmap=self.cmap,
            marker=rnd.choice(["o", "*", "+", "x"]),
        )
        self.__generate_random_plot(scatter_fn, rectangle, x, y)
    def generate_random_histogram(self, rectangle: Rectangle):
        x = np.random.normal(size=100)
        hist_fn = partial(
            plt.hist,
            orientation=random.choice(["horizontal", "vertical"]),
            histtype=random.choice(["bar", "barstacked", "step", "stepfilled"]),
            log=random.choice([True, False]),
            stacked=random.choice([True, False]),
            density=random.choice([True, False]),
            cumulative=random.choice([True, False]),
        )
        self.__generate_random_plot(hist_fn, rectangle, x, random.randint(5, 20))
    def generate_random_pie_chart(self, rectangle: Rectangle):
        n = random.randint(3, 7)
        x = np.random.uniform(size=n)
        pie_fn = partial(
            plt.pie,
            shadow=True,
            startangle=90,
            pctdistance=0.85,
            labeldistance=1.1,
            colors=self.cmap(np.linspace(0, 1, 10)),
        )
        self.__generate_random_plot(
            pie_fn,
            rectangle,
            x,
            np.random.uniform(0, 0.1, size=n),
            plot_kwargs=self.generate_plot_kwargs(keywords=["a"]),
        )
    def generate_plot_kwargs(self, keywords=None):
        kwargs = {
            "color": rnd.choice(self.cmap.colors),
            "linestyle": rnd.choice(["-", "--", "-.", ":"]),
            "linewidth": rnd.uniform(1, 4),
        }
        return kwargs if not keywords else {k: v for k, v in kwargs.items() if k in keywords}
    def __generate_random_plot(self, plot_fn, rectangle: Rectangle, x, y, plot_kwargs=None):
        plot_kwargs = self.generate_plot_kwargs() if plot_kwargs is None else plot_kwargs
        fig, ax = plt.subplots()
        fig.set_size_inches(rectangle.width / 100, rectangle.height / 100)
        fig.tight_layout(pad=0)
        plot_fn(x, y, **plot_kwargs)
        ax.set_facecolor("none")
        probably() and ax.set_title(generate_random_words(1, 3))
        # disable axes at random
        maybe() and ax.set_xticks([])
        maybe() and ax.set_yticks([])
        maybe() and ax.set_xticklabels([])
        maybe() and ax.set_yticklabels([])
        maybe() and ax.set_xlabel("")
        maybe() and ax.set_ylabel("")
        maybe() and ax.set_title("")
        maybe() and ax.set_frame_on(False)
        # remove spines at random
        maybe() and (ax.spines["top"].set_visible(False) or ax.spines["right"].set_visible(False))
        image = dump_plt_to_image(rectangle)
        assert image.mode == "RGBA"
        self.content = image if not self.content else superimpose(self.content, image)
@lru_cache(maxsize=None)
 def pick_colormap() -> ListedColormap:
    cmap_name = rnd.choice(
        [
            "viridis",
            "plasma",
            "inferno",
            "magma",
            "cividis",
        ],
    )
    loguru.logger.info(f"Using colormap {cmap_name}")
    cmap = plt.get_cmap(cmap_name)
    return cmap
 def dump_plt_to_image(rectangle):
    buf = io.BytesIO()
    plt.savefig(buf, format="png", transparent=True)
    buf.seek(0)
    image = Image.open(buf)
    image = image.resize((rectangle.width, rectangle.height))
    buf.close()
    plt.close()
    return image
--- a/synthesis/segment/random_content_rectangle.py
+++ b/synthesis/segment/random_content_rectangle.py
@ -0,0 +1,11 @@
 import random
 from synthesis.random import get_random_seed
 from synthesis.segment.content_rectangle import ContentRectangle
 class RandomContentRectangle(ContentRectangle):
    def __init__(self, x1, y1, x2, y2, content=None, seed=None):
        super().__init__(x1, y1, x2, y2, content)
        self.seed = seed or get_random_seed()
        self.random = random.Random(self.seed)
--- a/synthesis/segment/segments.py
+++ b/synthesis/segment/segments.py
@ -0,0 +1,102 @@
 from cv_analysis.utils.rectangle import Rectangle
 from synthesis.random import probably, rnd
 from synthesis.segment.content_rectangle import ContentRectangle
 from synthesis.segment.plot import RandomPlot
 from synthesis.segment.text_block import TextBlock
 from synthesis.text.font import pick_random_font_available_on_system
 from synthesis.text.text_block_generator.caption import CaptionGenerator
 def generate_random_plot(rectangle: Rectangle) -> ContentRectangle:
    block = RandomPlot(*rectangle.coords)
    block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None  # TODO: Refactor
    block.generate_random_plot(rectangle)
    return block
 def generate_recursive_random_table(rectangle: Rectangle, **kwargs) -> ContentRectangle:
    from synthesis.segment.table.table import RecursiveRandomTable
    block = RecursiveRandomTable(*rectangle.coords, **kwargs)
    if isinstance(rectangle, RecursiveRandomTable):
        block.content = rectangle.content if rectangle.content else None  # TODO: Refactor
    block.generate_random_table()
    return block
 def generate_text_block(rectangle: Rectangle, text) -> ContentRectangle:
    block = TextBlock(
        *rectangle.coords,
        font=pick_random_font_available_on_system(
            includes=("serif", "sans-serif", "bold"),
            excludes=("mono", "italic", "oblique", "cursive"),
        ),
        font_size=30,  # TODO: De-hardcode font size... Seems to have no effect on top of that
    )
    block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None  # TODO: Refactor
    block.put_text(text, rectangle)
    return block
 def generate_random_plot_with_caption(rectangle: Rectangle):
    # TODO: deduplicate with generate_random_table_with_caption
    plot_box, caption_box = split_into_figure_and_caption(rectangle)
    plot_box = generate_random_plot(plot_box)
    caption_box = generate_random_image_caption(caption_box)
    return plot_box, caption_box
 def generate_recursive_random_table_with_caption(rectangle: Rectangle):
    table_box, caption_box = split_into_figure_and_caption(rectangle)
    table_box = generate_recursive_random_table(table_box, double_rule=probably())
    caption_box = generate_random_table_caption(caption_box)
    return table_box, caption_box
 def split_into_figure_and_caption(rectangle: Rectangle):
    gap_percentage = rnd.uniform(0, 0.03)
    split_point = rnd.uniform(0.5, 0.9)
    figure_box = Rectangle(
        rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y1 + rectangle.height * (split_point - gap_percentage / 2)
    )
    caption_box = Rectangle(
        rectangle.x1, rectangle.y1 + rectangle.height * (split_point + gap_percentage / 2), rectangle.x2, rectangle.y2
    )
    return figure_box, caption_box
 def generate_random_text_block(rectangle: Rectangle, n_sentences=3000) -> ContentRectangle:
    block = TextBlock(
        *rectangle.coords,
        font=pick_random_font_available_on_system(
            includes=("serif", "sans-serif"),
            excludes=("bold", "mono", "italic", "oblique", "cursive"),
        ),
        font_size=30,  # TODO: De-hardcode font size... Seems to have no effect on top of that
    )
    block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None  # TODO: Refactor
    block.generate_random_text(rectangle, n_sentences)
    return block
 def generate_random_image_caption(rectangle: Rectangle) -> ContentRectangle:
    return generate_random_caption(rectangle, f"Fig {rnd.randint(1, 20)}")
 def generate_random_table_caption(rectangle: Rectangle) -> ContentRectangle:
    return generate_random_caption(rectangle, f"Tabl {rnd.randint(1, 20)}")
 def generate_random_caption(rectangle: Rectangle, caption_start, n_sentences=1000) -> ContentRectangle:
    block = TextBlock(
        *rectangle.coords,
        text_generator=CaptionGenerator(caption_start=caption_start),
        font=pick_random_font_available_on_system(
            includes=("italic",),
            excludes=("bold", "mono"),
        ),
        font_size=100,  # TODO: De-hardcode font size... Seems to have no effect on top of that
    )
    block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None  # TODO: Refactor
    block.generate_random_text(rectangle, n_sentences)
    return block
--- a/synthesis/segment/table/init.py
+++ b/synthesis/segment/table/init.py
--- a/synthesis/segment/table/cell.py
+++ b/synthesis/segment/table/cell.py
@ -0,0 +1,81 @@
 from PIL import Image, ImageDraw
 from cv_analysis.utils.image_operations import superimpose
 from synthesis.segment.content_rectangle import ContentRectangle
 class Cell(ContentRectangle):
    def __init__(self, x1, y1, x2, y2, color=None):
        super().__init__(x1, y1, x2, y2)
        self.background_color = color or (255, 255, 255, 0)
        # to debug use random border color: tuple([random.randint(100, 200) for _ in range(3)] + [255])
        self.cell_border_color = (0, 0, 0, 255)
        self.border_width = 1
        self.inset = 1
        self.content = Image.new("RGBA", (self.width, self.height))
        self.fill()
    def draw_top_border(self, width=None):
        self.draw_line((0, 0, self.width - self.inset, 0), width=width)
        return self
    def draw_bottom_border(self, width=None):
        self.draw_line((0, self.height - self.inset, self.width - self.inset, self.height - self.inset), width=width)
        return self
    def draw_left_border(self, width=None):
        self.draw_line((0, 0, 0, self.height), width=width)
        return self
    def draw_right_border(self, width=None):
        self.draw_line((self.width - self.inset, 0, self.width - self.inset, self.height), width=width)
        return self
    def draw_line(self, points, width=None):
        width = width or self.border_width
        draw = ImageDraw.Draw(self.content)
        draw.line(points, width=width, fill=self.cell_border_color)
        return self
    def draw(self, width=None):
        self.draw_top_border(width=width)
        self.draw_bottom_border(width=width)
        self.draw_left_border(width=width)
        self.draw_right_border(width=width)
        return self
    def draw_top_left_corner(self, width=None):
        self.draw_line((0, 0, 0, 0), width=width)
        self.draw_line((0, 0, 0, 0), width=width)
        return self
    def draw_top_right_corner(self, width=None):
        self.draw_line((self.width - self.inset, 0, self.width - self.inset, 0), width=width)
        self.draw_line((self.width - self.inset, 0, self.width - self.inset, 0), width=width)
        return self
    def draw_bottom_left_corner(self, width=None):
        self.draw_line((0, self.height - self.inset, 0, self.height - self.inset), width=width)
        self.draw_line((0, self.height - self.inset, 0, self.height - self.inset), width=width)
        return self
    def draw_bottom_right_corner(self, width=None):
        self.draw_line(
            (self.width - self.inset, self.height - self.inset, self.width - self.inset, self.height - self.inset),
            width=width,
        )
        self.draw_line(
            (self.width - self.inset, self.height - self.inset, self.width - self.inset, self.height - self.inset),
            width=width,
        )
        return self
    def fill(self, color=None):
        color = color or self.background_color
        image = Image.new("RGBA", (self.width, self.height), color=color)
        self.content = superimpose(image, self.content)
        return self
--- a/synthesis/segment/table/table.py
+++ b/synthesis/segment/table/table.py
@ -0,0 +1,279 @@
 import random
 from copy import deepcopy
 from enum import Enum
 from functools import lru_cache, partial
 from math import sqrt
 from typing import List, Iterable
 from PIL import Image
 from funcy import chunks, mapcat, repeatedly
 from loguru import logger
 from cv_analysis.utils.geometric import is_square_like
 from cv_analysis.utils.image_operations import superimpose
 from cv_analysis.utils.rectangle import Rectangle
 from cv_analysis.utils.spacial import area
 from synthesis.random import rnd, possibly
 from synthesis.segment.content_rectangle import ContentRectangle
 from synthesis.segment.plot import pick_colormap
 from synthesis.segment.random_content_rectangle import RandomContentRectangle
 from synthesis.segment.segments import generate_random_plot, generate_recursive_random_table, generate_text_block
 from synthesis.segment.table.cell import Cell
 from synthesis.text.text import generate_random_words, generate_random_number
 class RecursiveRandomTable(RandomContentRectangle):
    def __init__(self, x1, y1, x2, y2, border_width=1, layout: str = None, double_rule=False):
        """A table with a random number of rows and columns, and random content in each cell.
        Args:
            x1: x-coordinate of the top-left corner
            y1: y-coordinate of the top-left corner
            x2: x-coordinate of the bottom-right corner
            y2: y-coordinate of the bottom-right corner
            border_width: width of the table border
            layout: layout of the table, either "horizontal", "vertical", "closed", or "open"
            double_rule: whether to use double rules as the top and bottom rules
        """
        assert layout in [None, "horizontal", "vertical", "closed", "open"]
        super().__init__(x1, y1, x2, y2)
        self.double_rule = double_rule
        self.double_rule_width = (3 * border_width) if self.double_rule else 0
        self.n_columns = rnd.randint(1, max(self.width // 100, 1))
        self.n_rows = rnd.randint(1, max((self.height - 2 * self.double_rule_width) // rnd.randint(17, 100), 1))
        self.cell_size = (self.width / self.n_columns, (self.height - 2 * self.double_rule_width) / self.n_rows)
        self.content = Image.new("RGBA", (self.width, self.height), (255, 255, 255, 0))
        self.background_color = get_random_background_color()
        self.layout = layout or self.pick_random_layout()
        logger.debug(f"Layout: {self.layout}")
    def pick_random_layout(self):
        if self.n_columns == 1 and self.n_rows == 1:
            layout = "closed"
        elif self.n_columns == 1:
            layout = rnd.choice(["vertical", "closed"])
        elif self.n_rows == 1:
            layout = rnd.choice(["horizontal", "closed"])
        else:
            layout = rnd.choice(["closed", "horizontal", "vertical", "open"])
        return layout
    def generate_random_table(self):
        cells = self.generate_table()
        cells = list(self.fill_cells_with_content(cells))
        # FIXME: There is a bug here: Table rule is not drawn correctly, actually we want to do cells = ...
        list(self.draw_cell_borders(cells))
        self.content = paste_contents(self.content, cells)
        assert self.content.mode == "RGBA"
    def fill_cells_with_content(self, cells):
        yield from map(self.build_cell, cells)
    def build_cell(self, cell):
        if self.__is_a_small_cell(cell):
            cell = self.build_small_cell(cell)
        elif self.__is_a_medium_sized_cell(cell):
            cell = self.build_medium_sized_cell(cell)
        elif self.__is_a_large_cell(cell):
            cell = self.build_large_cell(cell)
        else:
            raise ValueError(f"Invalid cell size: {get_size(cell)}")
        assert cell.content.mode == "RGBA"
        return cell
    def __is_a_small_cell(self, cell):
        return get_size(cell) <= Size.SMALL.value
    def __is_a_medium_sized_cell(self, cell):
        return get_size(cell) <= Size.MEDIUM.value
    def __is_a_large_cell(self, cell):
        return get_size(cell) > Size.MEDIUM.value
    def build_small_cell(self, cell):
        content = (possibly() and generate_random_words(1, 3)) or (
            generate_random_number()
            + ((possibly() and " " + rnd.choice(["$", "£", "%", "EUR", "USD", "CAD", "ADA"])) or "")
        )
        return generate_text_block(cell, content)
    def build_medium_sized_cell(self, cell):
        choice = rnd.choice(["plot", "recurse"])
        if choice == "plot":
            return generate_random_plot(cell)
        elif choice == "recurse":
            return generate_recursive_random_table(
                cell,
                border_width=1,
                layout=random.choice(["open", "horizontal", "vertical"]),
                double_rule=False,
            )
        else:
            return generate_text_block(cell, f"{choice} {get_size(cell):.0f} {get_size_class(cell).name}")
    def build_large_cell(self, cell):
        choice = rnd.choice(["plot", "recurse"])
        logger.debug(f"Generating {choice} {get_size(cell):.0f} {get_size_class(cell).name}")
        if choice == "plot" and is_square_like(cell):
            return generate_random_plot(cell)
        else:
            logger.debug(f"recurse {get_size(cell):.0f} {get_size_class(cell).name}")
            return generate_recursive_random_table(
                cell,
                border_width=1,
                layout=random.choice(["open", "horizontal", "vertical"]),
                double_rule=False,
            )
    def draw_cell_borders(self, cells: List[ContentRectangle]):
        def draw_edges_based_on_position(cell: Cell, col_idx, row_index):
            # Draw the borders of the cell based on its position in the table
            if col_idx < self.n_columns - 1:
                cell.draw_right_border()
            if row_index < self.n_rows - 1:
                cell.draw_bottom_border()
        columns = chunks(self.n_rows, cells)
        for col_idx, column in enumerate(columns):
            for row_index, cell in enumerate(column):
                # TODO: Refactor
                c = Cell(*cell.coords, self.background_color)
                c.content = cell.content
                draw_edges_based_on_position(c, col_idx, row_index)
                yield cell
        if self.layout == "closed":
            # TODO: Refactor
            c = Cell(*self.coords, self.background_color)
            c.content = self.content
            c.draw()
            yield self
        # TODO: Refactor
        if self.double_rule:
            c1 = Cell(*self.coords)
            c1.draw_top_border(width=1)
            c1.draw_bottom_border(width=1)
            x1, y1, x2, y2 = self.coords
            c2 = Cell(x1, y1 + self.double_rule_width, x2, y2 - self.double_rule_width)
            c2.draw_top_border(width=1)
            c2.draw_bottom_border(width=1)
            c = superimpose(c1.content, c2.content)
            self.content = superimpose(c, self.content)
            yield self
    def generate_table(self) -> Iterable[ContentRectangle]:
        yield from mapcat(self.generate_column, range(self.n_columns))
    def generate_column(self, column_index) -> Iterable[ContentRectangle]:
        logger.trace(f"Generating column {column_index}.")
        generate_cell_for_row_index = partial(self.generate_cell, column_index)
        yield from map(generate_cell_for_row_index, range(self.n_rows))
    def generate_cell(self, column_index, row_index) -> ContentRectangle:
        w, h = self.cell_size
        x1, y1 = (column_index * w), (row_index * h) + self.double_rule_width
        x2, y2 = x1 + w, y1 + h
        logger.trace(f"Generating cell ({row_index}, {column_index}) at ({x1}, {y1}, {x2}, {y2}).")
        return Cell(x1, y1, x2, y2, self.background_color)
    def generate_column_names(self):
        column_names = repeatedly(self.generate_column_name, self.n_columns)
        return column_names
    def generate_column_name(self):
        column_name = generate_random_words(1, 3)
        return column_name
@lru_cache(maxsize=None)
 def get_random_background_color():
    return tuple([*get_random_color_complementing_color_map(pick_colormap()), rnd.randint(100, 210)])
 def get_random_color_complementing_color_map(colormap):
    def color_complement(r, g, b):
        """Reference: https://stackoverflow.com/a/40234924"""
        def hilo(a, b, c):
            if c < b:
                b, c = c, b
            if b < a:
                a, b = b, a
            if c < b:
                b, c = c, b
            return a + c
        k = hilo(r, g, b)
        return tuple(k - u for u in (r, g, b))
    color = colormap(0.2)[:3]
    color = [int(255 * v) for v in color]
    color = color_complement(*color)
    return color
 def paste_contents(page, contents: Iterable[ContentRectangle]):
    page = deepcopy(page)
    for content in contents:
        paste_content(page, content)
    return page
 def paste_content(page, content_box: ContentRectangle):
    assert content_box.content.mode == "RGBA"
    page.paste(content_box.content, (content_box.x1, content_box.y1), content_box.content)
    return page
 def get_size_class(rectangle: Rectangle):
    size = get_size(rectangle)
    if size < Size.SMALL.value:
        return Size.SMALL
    elif size < Size.LARGE.value:
        return Size.MEDIUM
    else:
        return Size.LARGE
 def get_size(rectangle: Rectangle):
    size = sqrt(area(rectangle))
    return size
 class Size(Enum):
    # FIXME: this has to scale with the DPI
    SMALL = 120
    MEDIUM = 180
    LARGE = 300
--- a/synthesis/segment/text_block.py
+++ b/synthesis/segment/text_block.py
@ -0,0 +1,62 @@
 from typing import List
 from PIL import Image, ImageDraw, ImageFont
 from funcy import first
 from cv_analysis.utils.image_operations import superimpose
 from cv_analysis.utils.rectangle import Rectangle
 from synthesis.segment.content_rectangle import ContentRectangle
 from synthesis.text.text_block_generator.paragraph import ParagraphGenerator
 from synthesis.text.font import pick_random_mono_space_font_available_on_system
 class TextBlock(ContentRectangle):
    def __init__(self, x1, y1, x2, y2, text_generator=None, font=None, font_size=None):
        super().__init__(x1, y1, x2, y2)
        self.font = font or ImageFont.load_default()  # pick_random_font_available_on_system(size=font_size)
        self.text_generator = text_generator or ParagraphGenerator()
    def __call__(self, *args, **kwargs):
        pass
    def generate_random_text(self, rectangle: Rectangle, n_sentences=3000):
        lines = self.text_generator(rectangle, n_sentences)
        image = write_lines_to_image(lines, rectangle, self.font)
        return self.__put_content(image)
    def put_text(self, text: str, rectangle: Rectangle):
        text_width, text_height = self.font.getsize(text)
        width_delta = text_width - rectangle.width
        height_delta = text_height - rectangle.height
        image = Image.new("RGBA", (text_width, text_height), (0, 255, 255, 0))
        if width_delta > 0 or height_delta > 0:
            image = image.resize((int(rectangle.width * 0.9), text_height))
        draw = ImageDraw.Draw(image)
        draw.text((0, 0), text, font=self.font, fill=(0, 0, 0, 255))
        return self.__put_content(image)
    def __put_content(self, image: Image.Image):
        self.content = image if not self.content else superimpose(self.content, image)
        assert self.content.mode == "RGBA"
        return self
 def write_lines_to_image(lines: List[str], rectangle: Rectangle, font=None) -> Image.Image:
    def write_line(line, line_number):
        draw.text((0, line_number * text_size), line, font=font, fill=(0, 0, 0, 255))
    font = font or pick_random_mono_space_font_available_on_system()
    image = Image.new("RGBA", (rectangle.width, rectangle.height), (0, 255, 255, 0))
    draw = ImageDraw.Draw(image)
    text_size = draw.textsize(first(lines), font=font)[1]
    for line_number, line in enumerate(lines):
        write_line(line, line_number)
    return image
--- a/synthesis/text/init.py
+++ b/synthesis/text/init.py
--- a/synthesis/text/font.py
+++ b/synthesis/text/font.py
@ -0,0 +1,106 @@
 import itertools
 from functools import lru_cache
 from pathlib import Path
 from typing import List
 from PIL import Image, ImageDraw, ImageFont
 from funcy import lmap, complement, keep, first, lzip, omit, project
 from loguru import logger
 from synthesis.random import rnd
 class RandomFontPicker:
    def __init__(self, font_dir=None, return_default_font=False):
        fonts = get_fonts(font_dir)
        fonts_lower = [font.lower() for font in fonts]
        domestic_fonts_mask = lmap(complement(self.looks_foreign), fonts_lower)
        self.fonts = list(itertools.compress(fonts, domestic_fonts_mask))
        self.fonts_lower = list(itertools.compress(fonts_lower, domestic_fonts_mask))
        self.test_image = Image.new("RGB", (200, 200), (255, 255, 255))
        self.draw = ImageDraw.Draw(self.test_image)
        self.return_default_font = return_default_font
    def looks_foreign(self, font):
        # This filters out foreign fonts (e.g. 'Noto Serif Malayalam')
        return len(font.split("-")[0]) > 10
    def pick_random_font_available_on_system(self, includes=None, excludes=None) -> ImageFont:  # FIXME: Slow!
        if self.return_default_font:
            return ImageFont.load_default()
        includes = [i.lower() for i in includes] if includes else []
        excludes = [i.lower() for i in excludes] if excludes else []
        logger.debug(f"Picking font by includes={includes} and excludes={excludes}.")
        def includes_pattern(font):
            return not includes or any(include in font for include in includes)
        def excludes_pattern(font):
            return not excludes or not any(exclude in font for exclude in excludes)
        self.shuffle_fonts()
        mask = lmap(lambda f: includes_pattern(f) and excludes_pattern(f), self.fonts_lower)
        fonts = itertools.compress(self.fonts, mask)
        fonts = keep(map(self.load_font, fonts))
        # fonts = filter(self.font_is_renderable, fonts)  # FIXME: this does not work
        font = first(fonts)
        logger.info(f"Using font: {font.getname()}")
        return font
    def shuffle_fonts(self):
        l = lzip(self.fonts, self.fonts_lower)
        rnd.shuffle(l)
        self.fonts, self.fonts_lower = lzip(*l)
    def pick_random_mono_space_font_available_on_system(self) -> ImageFont:
        return self.pick_random_font_available_on_system(includes=["mono"], excludes=["oblique"])
    @lru_cache(maxsize=None)
    def load_font(self, font: str):
        logger.trace(f"Loading font: {font}")
        try:
            return ImageFont.truetype(font, size=11)
        except OSError:
            return None
    @lru_cache(maxsize=None)
    def font_is_renderable(self, font):
        text_size = self.draw.textsize("Test String", font=font)
        return text_size[0] > 0 and text_size[1]
 def get_fonts(path: Path = None) -> List[str]:
    path = path or Path("/usr/share/fonts")
    fonts = list(path.rglob("*.ttf"))
    fonts = [font.name for font in fonts]
    return fonts
@lru_cache(maxsize=None)
 def get_font_picker(**kwargs):
    return RandomFontPicker(**kwargs, return_default_font=True)
@lru_cache(maxsize=None)
 def pick_random_mono_space_font_available_on_system(**kwargs):
    font_picker = get_font_picker(**omit(kwargs, ["includes", "excludes"]))
    return font_picker.pick_random_mono_space_font_available_on_system()
@lru_cache(maxsize=None)
 def pick_random_font_available_on_system(**kwargs):
    kwargs["excludes"] = (
        *kwargs.get(
            "excludes",
        ),
        "Kinnari",
        "KacstOne",
    )
    font_picker = get_font_picker(**omit(kwargs, ["includes", "excludes"]))
    return font_picker.pick_random_font_available_on_system(**project(kwargs, ["includes", "excludes"]))
--- a/synthesis/text/line_formatter/init.py
+++ b/synthesis/text/line_formatter/init.py
--- a/synthesis/text/line_formatter/identity.py
+++ b/synthesis/text/line_formatter/identity.py
@ -0,0 +1,9 @@
 from synthesis.text.line_formatter.line_formatter import LineFormatter
 class IdentityLineFormatter(LineFormatter):
    def __init__(self):
        pass
    def __call__(self, lines, last_full):
        return lines, last_full
--- a/synthesis/text/line_formatter/line_formatter.py
+++ b/synthesis/text/line_formatter/line_formatter.py
@ -0,0 +1,5 @@
 import abc
 class LineFormatter(abc.ABC):
    pass
--- a/synthesis/text/line_formatter/paragraph.py
+++ b/synthesis/text/line_formatter/paragraph.py
@ -0,0 +1,41 @@
 from funcy import identity, compose, first, juxt, rest, rcompose
 from cv_analysis.utils import star, rconj
 from synthesis.random import rnd
 from synthesis.text.line_formatter.line_formatter import LineFormatter
 class ParagraphLineFormatter(LineFormatter):
    def __init__(self, blank_line_percentage=None):
        self.blank_line_percentage = blank_line_percentage or rnd.uniform(0, 0.5)
    def __call__(self, lines, last_full):
        return self.format_lines(lines, last_full)
    def format_lines(self, lines, last_full):
        def truncate_current_line():
            return rnd.random() < self.blank_line_percentage and last_full
        # This is meant to be read from the bottom up.
        current_line_shall_not_be_a_full_line = truncate_current_line()
        line_formatter = self.truncate_line if current_line_shall_not_be_a_full_line else identity
        format_current_line = compose(line_formatter, first)
        move_current_line_to_back = star(rconj)
        split_first_line_from_lines_and_format_the_former = juxt(rest, format_current_line)
        split_off_current_line_then_format_it_then_move_it_to_the_back = rcompose(
            split_first_line_from_lines_and_format_the_former,
            move_current_line_to_back,
        )
        current_line_is_a_full_line = not current_line_shall_not_be_a_full_line
        # Start reading here and move up.
        return split_off_current_line_then_format_it_then_move_it_to_the_back(lines), current_line_is_a_full_line
    def format_line(self, line, full=True):
        line = self.truncate_line(line) if not full else line
        return line, full
    def truncate_line(self, line: str):
        n_trailing_words = rnd.randint(0, 4)
        line = " ".join(line.split()[-n_trailing_words - 1 : -1]).replace(".", "")
        line = line + ".\n" if line else line
        return line
--- a/synthesis/text/text.py
+++ b/synthesis/text/text.py
@ -0,0 +1,26 @@
 import random
 from faker import Faker
 from synthesis.random import rnd
 def generate_random_words(n_min, n_max):
    words = " ".join(Faker().words(rnd.randint(n_min, n_max)))
    return words
 def generate_random_number():
    return str(
        round(
            random.choice(
                [
                    random.randint(-10000, 10000),
                    random.uniform(-100, 100),
                ]
            ),
            random.choice(
                [0, 1, 2, 3],
            ),
        )
    )
--- a/synthesis/text/text_block_generator/init.py
+++ b/synthesis/text/text_block_generator/init.py
--- a/synthesis/text/text_block_generator/caption.py
+++ b/synthesis/text/text_block_generator/caption.py
@ -0,0 +1,22 @@
 from funcy import first, rest
 from cv_analysis.utils import conj
 from synthesis.random import rnd
 from synthesis.text.text_block_generator.paragraph import generate_random_text_lines
 from synthesis.text.text_block_generator.text_block_generator import TextBlockGenerator
 from synthesis.text.line_formatter.identity import IdentityLineFormatter
 class CaptionGenerator(TextBlockGenerator):
    def __init__(self, caption_start=None):
        self.line_formatter = IdentityLineFormatter()
        self.caption_start = caption_start or f"Fig {rnd.randint(1, 20)}"
    def __call__(self, rectangle, n_sentences):
        return self.generate_paragraph(rectangle, n_sentences)
    def generate_paragraph(self, rectangle, n_sentences):
        lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences)
        first_line_modified = f"{self.caption_start}.: {first(lines)}"
        lines = conj(first_line_modified, rest(lines))
        return lines
--- a/synthesis/text/text_block_generator/paragraph.py
+++ b/synthesis/text/text_block_generator/paragraph.py
@ -0,0 +1,36 @@
 import textwrap
 from typing import List
 from faker import Faker
 from funcy import identity, iterate, take, last
 from cv_analysis.utils import star
 from cv_analysis.utils.rectangle import Rectangle
 from synthesis.random import rnd
 from synthesis.text.text_block_generator.text_block_generator import TextBlockGenerator
 from synthesis.text.line_formatter.paragraph import ParagraphLineFormatter
 class ParagraphGenerator(TextBlockGenerator):
    def __init__(self):
        self.line_formatter = ParagraphLineFormatter(blank_line_percentage=rnd.uniform(0, 0.5))
    def __call__(self, rectangle, n_sentences):
        return self.generate_paragraph(rectangle, n_sentences)
    def generate_paragraph(self, rectangle, n_sentences):
        lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences)
        return lines
 def generate_random_text_lines(rectangle: Rectangle, line_formatter=identity, n_sentences=3000) -> List[str]:
    text = Faker().paragraph(nb_sentences=n_sentences, variable_nb_sentences=False, ext_word_list=None)
    unformatted_lines = textwrap.wrap(text, width=rectangle.width, break_long_words=False)
    # each iteration of the line formatter function formats one more line and adds it to the back of the list
    formatted_lines_generator = iterate(star(line_formatter), (unformatted_lines, True))
    # hence do as many iterations as there are lines in the rectangle
    lines_per_iteration = take(len(unformatted_lines), formatted_lines_generator)
    # and then take the lines from the last iteration of the function
    formatted_lines, _ = last(lines_per_iteration)
    return formatted_lines
--- a/synthesis/text/text_block_generator/text_block_generator.py
+++ b/synthesis/text/text_block_generator/text_block_generator.py
@ -0,0 +1,5 @@
 import abc
 class TextBlockGenerator(abc.ABC):
    pass
--- a/test/conftest.py
+++ b/test/conftest.py
@ -1,6 +1,11 @@
 import warnings
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 pytest_plugins = [
    "test.fixtures.table_parsing",
    "test.fixtures.figure_detection",
    "test.fixtures.page_generation.page",
 ]
--- a/test/data/paper/.gitignore
+++ b/test/data/paper/.gitignore
@ -0,0 +1,8 @@
 /crumpled_paper.jpg
 /digital_paper.jpg
 /gray_paper.jpg
 /rough_grain_paper.jpg
 /crumpled.jpg
 /digital.jpg
 /plain.jpg
 /rough_grain.jpg
--- a/test/data/paper/crumpled.jpg.dvc
+++ b/test/data/paper/crumpled.jpg.dvc
@ -0,0 +1,4 @@
 outs:
 - md5: d38ebef85a0689bfd047edc98e4a5f93
  size: 14131338
  path: crumpled.jpg
--- a/test/data/paper/digital.jpg.dvc
+++ b/test/data/paper/digital.jpg.dvc
@ -0,0 +1,4 @@
 outs:
 - md5: 8c4c96efe26731e14dd4a307dad718fd
  size: 108546
  path: digital.jpg
--- a/test/data/paper/plain.jpg.dvc
+++ b/test/data/paper/plain.jpg.dvc
@ -0,0 +1,4 @@
 outs:
 - md5: 33741812aaff0e54849c5128ae2dccf4
  size: 6924421
  path: plain.jpg
--- a/test/data/paper/rough_grain.jpg.dvc
+++ b/test/data/paper/rough_grain.jpg.dvc
@ -0,0 +1,4 @@
 outs:
 - md5: eb62925241917d55db05e07851f3f6b9
  size: 1679152
  path: rough_grain.jpg
--- a/test/fixtures/page_generation/init.py
+++ b/test/fixtures/page_generation/init.py
--- a/test/fixtures/page_generation/page.py
+++ b/test/fixtures/page_generation/page.py
@ -0,0 +1,266 @@
 import sys
 from typing import Tuple, Iterable, List
 import blend_modes
 import numpy as np
 import pytest
 from PIL import Image, ImageEnhance
 from PIL.Image import Transpose
 from funcy import (
    juxt,
    compose,
    identity,
 )
 from loguru import logger
 from cv_analysis.locations import TEST_PAGE_TEXTURES_DIR
 from cv_analysis.utils.conversion import normalize_image_format_to_array, normalize_image_format_to_pil
 from cv_analysis.utils.image_operations import blur, sharpen, overlay, superimpose
 from cv_analysis.utils.rectangle import Rectangle
 from synthesis.content_generator import ContentGenerator
 from synthesis.partitioner.two_column import TwoColumnPagePartitioner
 from synthesis.random import rnd
 from synthesis.segment.table.table import paste_contents
 logger.remove()
 logger.add(sys.stderr, level="INFO")
@pytest.fixture(
    params=[
        # "rough_grain",
        # "plain",
        # "digital",
        "crumpled",
    ]
 )
 def base_texture(request, size):
    texture = Image.open(TEST_PAGE_TEXTURES_DIR / (request.param + ".jpg"))
    texture = texture.resize(size)
    return texture
@pytest.fixture(
    params=[
        # "portrait",
        "landscape",
    ]
 )
 def orientation(request):
    return request.param
@pytest.fixture(
    params=[
        # 30,
        100,
    ]
 )
 def dpi(request):
    return request.param
@pytest.fixture(
    params=[
        # "brown",
        "sepia",
        # "gray",
        # "white",
        # "light_red",
        # "light_blue",
    ]
 )
 def color_name(request):
    return request.param
@pytest.fixture(
    params=[
        # "smooth",
        # "coarse",
        "neutral",
    ]
 )
 def texture_name(request):
    return request.param
@pytest.fixture(
    params=[
        # 30,
        70,
        # 150,
    ]
 )
 def color_intensity(request):
    return request.param
 def random_flip(image):
    if rnd.choice([True, False]):
        image = image.transpose(Transpose.FLIP_LEFT_RIGHT)
    if rnd.choice([True, False]):
        image = image.transpose(Transpose.FLIP_TOP_BOTTOM)
    return image
@pytest.fixture
 def color(color_name):
    return {
        "brown": "#7d6c5b",
        "sepia": "#b8af88",
        "gray": "#9c9c9c",
        "white": "#ffffff",
        "light_red": "#d68c8b",
        "light_blue": "#8bd6d6",
    }[color_name]
@pytest.fixture
 def texture_fn(texture_name, size):
    if texture_name == "smooth":
        fn = blur
    elif texture_name == "coarse":
        fn = compose(overlay, juxt(blur, sharpen))
    else:
        fn = identity
    return normalize_image_function(fn)
 def normalize_image_function(func):
    def inner(image):
        image = normalize_image_format_to_array(image)
        image = func(image)
        image = normalize_image_format_to_pil(image)
        return image
    return inner
@pytest.fixture
 def texture(tinted_blank_page, base_texture):
    texture = superimpose(base_texture, tinted_blank_page)
    return texture
@pytest.fixture
 def tinted_blank_page(size, color, color_intensity):
    tinted_page = Image.new("RGBA", size, color)
    tinted_page.putalpha(color_intensity)
    return tinted_page
@pytest.fixture
 def blank_page(size, color, color_intensity):
    page = Image.new("RGBA", size, color=(255, 255, 255, 0))
    return page
@pytest.fixture
 def size(dpi, orientation):
    if orientation == "portrait":
        size = (8.5 * dpi, 11 * dpi)
    elif orientation == "landscape":
        size = (11 * dpi, 8.5 * dpi)
    else:
        raise ValueError(f"Unknown orientation: {orientation}")
    size = tuple(map(int, size))
    return size
@pytest.fixture(
    params=[
        TwoColumnPagePartitioner,
        # RandomPagePartitioner
    ]
 )
 def page_partitioner(request):
    return request.param()
@pytest.fixture
 def boxes(page_partitioner, blank_page):
    boxes = page_partitioner(blank_page)
    return boxes
@pytest.fixture
 def prepared_texture(texture, texture_fn):
    texture = random_flip(texture)
    texture = texture_fn(texture)
    return texture
@pytest.fixture
 def content_boxes(boxes):
    content_generator = ContentGenerator()
    content_boxes = content_generator(boxes)
    return content_boxes
@pytest.fixture
 def page_with_opaque_content(
    blank_page, tinted_blank_page, prepared_texture, content_boxes
 ) -> Tuple[np.ndarray, Iterable[Rectangle]]:
    """Creates a page with content"""
    page = paste_contents(prepared_texture, content_boxes)
    return page, content_boxes
@pytest.fixture
 def page_with_translucent_content(
    blank_page, tinted_blank_page, prepared_texture, content_boxes
 ) -> Tuple[np.ndarray, List[Rectangle]]:
    """Creates a page with content"""
    page_content = paste_contents(blank_page, content_boxes)
    page = blend_by_multiply(page_content, prepared_texture)
    return page, content_boxes
 def blend_by_multiply(page_content, texture):
    def to_array(image: Image) -> np.ndarray:
        return np.array(image).astype(np.float32)
    texture.putalpha(255)
    page_content.putalpha(255)
    factor = 1.2
    enhancer = ImageEnhance.Contrast(texture)
    texture = enhancer.enhance(factor)
    page = blend_modes.multiply(
        *map(
            to_array,
            (
                page_content,
                texture,
            ),
        ),
        opacity=1,
    ).astype(np.uint8)
    return page
@pytest.fixture(scope="function")
 def random_seeding():
    from synthesis.segment.plot import pick_colormap
    seed = str(rnd.randint(0, 2**32 - 1))
    logger.info(f"Random seed: {seed}")
    rnd.seed(seed)
    pick_colormap.cache_clear()
@pytest.fixture
 def page_with_content(
    random_seeding,
    page_with_translucent_content,
    # page_with_opaque_content,
 ) -> np.ndarray:
    page, boxes = page_with_translucent_content
    # page, boxes = page_with_opaque_content
    return page, boxes
--- a/test/fixtures/server.py
+++ b/test/fixtures/server.py
@ -6,7 +6,7 @@ import cv2
 import pytest
 from funcy import first
-from cv_analysis.utils.structures import Rectangle
+from cv_analysis.utils.rectangle import Rectangle
@pytest.fixture
--- a/test/fixtures/table_parsing.py
+++ b/test/fixtures/table_parsing.py
@ -9,8 +9,8 @@ from loguru import logger
 from cv_analysis.config import get_config
 from cv_analysis.locations import REPO_ROOT_PATH, TEST_DATA_DVC
-from cv_analysis.utils.draw import draw_rectangles
+from cv_analysis.utils.drawing import draw_rectangles
-from cv_analysis.utils.open_pdf import open_pdf
+from cv_analysis.utils.input import open_analysis_input_file
 from test.fixtures.figure_detection import paste_text
 CV_CONFIG = get_config()
@ -19,7 +19,7 @@ CV_CONFIG = get_config()
@pytest.fixture
 def client_page_with_table(test_file_index, dvc_test_data):
    img_path = join(CV_CONFIG.test_data_dir, f"test{test_file_index}.png")
-    return first(open_pdf(img_path))
+    return first(open_analysis_input_file(img_path))
@pytest.fixture(scope="session")
--- a/test/page_generation_test.py
+++ b/test/page_generation_test.py
@ -0,0 +1,19 @@
 from typing import Iterable
 from PIL.Image import Image
 from cv_analysis.utils.display import show_image
 from cv_analysis.utils.rectangle import Rectangle
 def test_blank_page(page_with_content):
    page, boxes = page_with_content
    draw_boxes(page, boxes)
 def draw_boxes(page: Image, boxes: Iterable[Rectangle]):
    from cv_analysis.utils.drawing import draw_rectangles
    page = draw_rectangles(page, boxes, filled=False, annotate=True)
    show_image(page, backend="pil")
--- a/test/unit_tests/figure_detection/figure_detection_test.py
+++ b/test/unit_tests/figure_detection/figure_detection_test.py
@ -3,6 +3,7 @@ from math import prod
 import cv2
 import pytest
 from cv_analysis.utils.spacial import area
 from test.utils.utils import powerset
@ -15,21 +16,20 @@ class TestFindPrimaryTextRegions:
    @pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)])
    def test_page_without_text_yields_figures(self, figure_detection_pipeline, page_with_images, image_size):
-        results = figure_detection_pipeline(page_with_images)
+        result_rectangles = figure_detection_pipeline(page_with_images)
-        result_figures_size = map(lambda x: (x.w, x.h), results)
+        result_figure_sizes = map(lambda r: (r.width, r.height), result_rectangles)
-        assert all([image_size[0] < res[0] and image_size[1] < res[1] for res in result_figures_size])
+        assert all([image_size[0] < res[0] and image_size[1] < res[1] for res in result_figure_sizes])
    @pytest.mark.parametrize("font_scale", [1, 1.5, 2])
    @pytest.mark.parametrize("font_style", [cv2.FONT_HERSHEY_SIMPLEX, cv2.FONT_HERSHEY_COMPLEX])
    @pytest.mark.parametrize("text_types", powerset(["body", "header", "caption"]))
    @pytest.mark.parametrize("error_tolerance", [0.025])
    def test_page_with_only_text_yields_no_figures(self, figure_detection_pipeline, page_with_text, error_tolerance):
-        results = figure_detection_pipeline(page_with_text)
+        result_rectangles = figure_detection_pipeline(page_with_text)
-
+        result_figure_areas = sum(map(area, result_rectangles))
        result_figures_area = sum(map(lambda x: (x.w * x.h), results))
        page_area = prod(page_with_text.shape)
-        error = result_figures_area / page_area
+        error = result_figure_areas / page_area
        assert error <= error_tolerance
@ -45,11 +45,11 @@ class TestFindPrimaryTextRegions:
        image_size,
        error_tolerance,
    ):
-        results = list(figure_detection_pipeline(page_with_images_and_text))
+        result_rectangles = list(figure_detection_pipeline(page_with_images_and_text))
-        result_figures_area = sum(map(lambda x: (x.w * x.h), results))
+        result_figure_areas = sum(map(area, result_rectangles))
        expected_figure_area = prod(image_size)
-        error = abs(result_figures_area - expected_figure_area) / expected_figure_area
+        error = abs(result_figure_areas - expected_figure_area) / expected_figure_area
        assert error <= error_tolerance
--- a/test/unit_tests/layout_parsing_test.py
+++ b/test/unit_tests/layout_parsing_test.py
--- a/test/unit_tests/server_pipeline_test.py
+++ b/test/unit_tests/server_pipeline_test.py
@ -3,12 +3,11 @@ import numpy as np
 import pytest
 from cv_analysis.server.pipeline import table_parsing_formatter, figure_detection_formatter, make_analysis_pipeline
-from cv_analysis.utils.structures import Rectangle
+from cv_analysis.utils.rectangle import Rectangle
 def analysis_fn_mock(image: np.ndarray):
-    bbox = (0, 0, 42, 42)
+    return [Rectangle(0, 0, 42, 42)]
    return [Rectangle.from_xyxy(bbox)]
@pytest.fixture
--- a/test/unit_tests/table_parsing_test.py
+++ b/test/unit_tests/table_parsing_test.py
@ -2,9 +2,12 @@ from itertools import starmap
 import cv2
 import pytest
 from funcy import lmap, compose, zipdict
 from cv_analysis.table_parsing import parse_tables
-from cv_analysis.utils.test_metrics import compute_document_score
+from cv_analysis.utils import lift
 from cv_analysis.utils.rectangle import Rectangle
 from cv_analysis.utils.metrics import compute_document_score
@pytest.mark.parametrize("score_threshold", [0.95])
@ -12,8 +15,9 @@ from cv_analysis.utils.test_metrics import compute_document_score
 def test_table_parsing_on_client_pages(
    score_threshold, client_page_with_table, expected_table_annotation, test_file_index
 ):
-    result = [x.json_xywh() for x in parse_tables(client_page_with_table)]
+
-    formatted_result = {"pages": [{"page": str(test_file_index), "cells": result}]}
+    results = compose(lift(rectangle_to_dict), parse_tables)(client_page_with_table)
    formatted_result = {"pages": [{"cells": results}]}
    score = compute_document_score(formatted_result, expected_table_annotation)
@ -25,6 +29,14 @@ def error_tolerance(line_thickness):
    return line_thickness * 7
 def rectangle_to_dict(rectangle: Rectangle):
    return zipdict(["x", "y", "width", "height"], rectangle_to_xywh(rectangle))
 def rectangle_to_xywh(rectangle: Rectangle):
    return rectangle.x1, rectangle.y1, abs(rectangle.x1 - rectangle.x2), abs(rectangle.y1 - rectangle.y2)
@pytest.mark.parametrize("line_thickness", [1, 2, 3])
@pytest.mark.parametrize("line_type", [cv2.LINE_4, cv2.LINE_AA, cv2.LINE_8])
@pytest.mark.parametrize("table_style", ["closed horizontal vertical", "open horizontal vertical"])
@ -32,7 +44,7 @@ def error_tolerance(line_thickness):
@pytest.mark.parametrize("background_color", [255, 220])
@pytest.mark.parametrize("table_shape", [(5, 8)])
 def test_table_parsing_on_generic_pages(page_with_table, expected_gold_page_with_table, error_tolerance):
-    result = [x.xywh() for x in parse_tables(page_with_table)]
+    result = lmap(rectangle_to_xywh, parse_tables(page_with_table))
    assert (
        result == expected_gold_page_with_table
        or average_error(result, expected_gold_page_with_table) <= error_tolerance
@ -46,8 +58,8 @@ def test_table_parsing_on_generic_pages(page_with_table, expected_gold_page_with
@pytest.mark.parametrize("background_color", [255, 220])
@pytest.mark.parametrize("table_shape", [(5, 8)])
@pytest.mark.xfail
-def test_bad_qual_table(page_with_patchy_table, expected_gold_page_with_table, error_tolerance):
+def test_low_quality_table(page_with_patchy_table, expected_gold_page_with_table, error_tolerance):
-    result = [x.xywh() for x in parse_tables(page_with_patchy_table)]
+    result = lmap(rectangle_to_xywh, parse_tables(page_with_patchy_table))
    assert (
        result == expected_gold_page_with_table
        or average_error(result, expected_gold_page_with_table) <= error_tolerance
Author	SHA1	Message	Date
Matthias Bisping	e5832a1735	reduce pytest parameter combinatons	2023-02-13 14:08:17 +01:00
Matthias Bisping	a1e6c9e553	clear color map cache per pytest parameter combination	2023-02-01 18:56:16 +01:00
Matthias Bisping	21a9db25cd	Remove obsolete line	2023-02-01 18:38:52 +01:00
Matthias Bisping	90c367cc32	Refactoring: Move	2023-02-01 18:38:05 +01:00
Matthias Bisping	42d285e35b	Refactoring: Move Move content generator into its own module	2023-02-01 18:33:45 +01:00
Matthias Bisping	ddc92461d7	Refactoring: Move Move remaining segment generation functions into segments module	2023-02-01 18:30:01 +01:00
Matthias Bisping	d2cb78d38f	Refactoring: Move Move zipmap and evert_nth into utils module	2023-02-01 18:27:30 +01:00
Matthias Bisping	9c401a977c	Refactoring: Move Move rectangle shrinking logic into new morphing module	2023-02-01 18:26:03 +01:00
Matthias Bisping	b77951d4fe	Refactoring: Move Move segment generation functions into their own module	2023-02-01 18:22:17 +01:00
Matthias Bisping	c7b224a98a	Refactoring: Move Move cell class into its own module	2023-02-01 18:14:57 +01:00
Matthias Bisping	f0072b0852	Refactoring: Move Move table generation related code into new table module	2023-02-01 18:12:19 +01:00
Matthias Bisping	9fd87aff8e	Refactoring: Move - Move random plot into its own module - Move geometric predicates into their own module	2023-02-01 18:08:45 +01:00
Matthias Bisping	6728642a4f	Refactoring: Move Mode random helper functions	2023-02-01 17:59:56 +01:00
Matthias Bisping	cc86a79ac7	Refactoring: Move Move text block generator module into text module	2023-02-01 17:33:53 +01:00
Matthias Bisping	160d5b3473	Remove unused code	2023-02-01 17:29:29 +01:00
Matthias Bisping	7b2f921472	Refactoring: Move Move text block generators into their own module	2023-02-01 17:28:20 +01:00
Matthias Bisping	e258df899f	Refactoring: Move Move text block into its own module	2023-02-01 17:24:56 +01:00
Matthias Bisping	cef97b33f9	Refactoring: Move Move page partitioners into partitioner module	2023-02-01 17:20:32 +01:00
Matthias Bisping	a54ccb2fdf	Refactoring: Move Move text generation funtions into their own module	2023-02-01 17:15:41 +01:00
Matthias Bisping	1de938f2fa	Refactoring: Move Move font related functions into font module	2023-02-01 17:07:34 +01:00
Matthias Bisping	de9b3bad93	Refactoring: Move Move font picker into new font module	2023-02-01 17:05:51 +01:00
Matthias Bisping	9480d58a8a	Refactoring: Move Move line formatters into their own module	2023-02-01 16:59:33 +01:00
Matthias Bisping	cc0094d3f7	Refactoring: Move Move random content rectangle into its own module	2023-02-01 16:55:51 +01:00
Matthias Bisping	93a52080df	Remove unused code	2023-02-01 16:52:57 +01:00
Matthias Bisping	4ec3429dec	Refactoring: Move Move page partitioner into its own module	2023-02-01 16:51:16 +01:00
Matthias Bisping	bdcb2f1bef	Refactoring: Move	2023-02-01 16:42:55 +01:00
Matthias Bisping	845d169194	Refactoring	2023-02-01 16:39:39 +01:00
Matthias Bisping	56c10490b9	Refactoring	2023-02-01 16:36:21 +01:00
Matthias Bisping	740a9cb3c2	Refactoring	2023-02-01 16:33:32 +01:00
Matthias Bisping	b3cf3e4454	Refactoring Add fixture for page partitioner	2023-02-01 16:31:58 +01:00
Matthias Bisping	2fb450943e	Refactoring: Move	2023-02-01 16:25:50 +01:00
Matthias Bisping	fd76933b5a	Refactoring: Move Move image operations such as blurring into their own module.	2023-02-01 16:16:18 +01:00
Matthias Bisping	8095900543	Fix effectless bug	2023-02-01 16:10:48 +01:00
Matthias Bisping	d42f053c81	Refactoring: Re-order	2023-02-01 13:22:42 +01:00
Matthias Bisping	04a617b9df	Refactoring Move content rectangle base class	2023-02-01 13:19:29 +01:00
Matthias Bisping	ba901473fe	Set alpha in box frame drawing logic	2023-02-01 13:12:53 +01:00
Matthias Bisping	e8b4467265	Remove unused code	2023-02-01 13:12:20 +01:00
Matthias Bisping	4c65d906b8	Add fixme	2023-02-01 11:53:35 +01:00
Matthias Bisping	667b4a4858	Refactoring and text cell content tweaking	2023-02-01 11:32:37 +01:00
Matthias Bisping	83e6dc3ce7	Add IPython dev dependency	2023-02-01 11:32:12 +01:00
Matthias Bisping	fb69eb7f5c	Refactoring Break up conditional tree in cell building function	2023-02-01 10:09:32 +01:00
Matthias Bisping	f98256d7e9	Fix bug in table generation - Remove the check `elif size < Size.LARGE.value` and made it into `else`, since it was intended to cover all cells that are larger than medium size. - Also disbale page number generation for now	2023-02-01 09:58:34 +01:00
Matthias Bisping	cbb3a8cc61	[WIP] Page numbers	2023-01-31 17:09:59 +01:00
Matthias Bisping	9f9face8f0	Make scatterplots more variable	2023-01-31 16:33:48 +01:00
Matthias Bisping	f2af040c5b	[WIP] texture and content blendig with blend_modes module	2023-01-31 16:05:08 +01:00
Matthias Bisping	6dbe3b6fc9	Refactoring	2023-01-31 14:37:46 +01:00
Matthias Bisping	a3fece8096	Found first issue for pale colors	2023-01-31 14:16:33 +01:00
Matthias Bisping	26180373a0	Remove unused imports	2023-01-31 13:55:13 +01:00
Matthias Bisping	186b4530f0	[WIP] Make texture show through page content	2023-01-31 13:53:52 +01:00
Matthias Bisping	a1ccda4ea9	Refactoring	2023-01-30 14:11:48 +01:00
Matthias Bisping	25d35e2349	Fix bug in booktabs code	2023-01-25 20:55:19 +01:00
Matthias Bisping	daea7d2bf7	[WIP] head and bottom border (booktabs-like) for tables	2023-01-25 20:18:09 +01:00
Matthias Bisping	d5e501a05d	Tweak plots	2023-01-25 19:44:54 +01:00
Matthias Bisping	d9d363834a	Tweak plots and table cells - Choice of plot depends on aspect ratio of rectanlge now and is handled in the plot constructor - Made pie charts more diverse - Table cell background is no complementary color chosen against colormap	2023-01-25 19:13:43 +01:00
Matthias Bisping	5dc13e7137	[WIP] More table / cell edge fiddling and issue fixing Fix: The cell width and height were rounded to int in the table constructor. The imprecison of rounding would accumulate when stacking cells in a row or columns leading to gaps at the bottom and right hand edge of tables. The rounding has now been removed and left to the cell constructor. Cells are derived from the Rectangle class, which does the rounding itself. This eliminates the issue with accumulated gaps in the tables.	2023-01-25 18:16:36 +01:00
Matthias Bisping	826cd3b6a9	[WIP] More table / cell edge fiddling and issue fixing	2023-01-25 17:23:30 +01:00
Matthias Bisping	4f788af35b	[WIP] More table / cell edge fiddling and issue fixing Cells now draw only inner borders and the table draws the outer border if the layout is "closed". This avoids multiple lines around cells of nested tables, since nested tables are now created with the layout parameter set to "open", in which case the table does not draw its borders.	2023-01-25 10:31:17 +01:00
Matthias Bisping	10ea584143	[WIP] More table / cell edge fiddling and issue fixing	2023-01-24 15:44:24 +01:00
Matthias Bisping	7676a8148e	[WIP] More table / cell edge fiddling and issue fixing	2023-01-24 13:53:59 +01:00
Matthias Bisping	cee5e69a4b	Make page generation reproducable Tie all structural random events to a seeded random object.	2023-01-24 13:07:45 +01:00
Matthias Bisping	e715c86f8d	Fix box clashes Rewrote box generation sequence and eliminated issue with gaps / overlapping boxes	2023-01-24 12:07:47 +01:00
Matthias Bisping	c5ba489931	Refactoring	2023-01-24 10:49:38 +01:00
Matthias Bisping	3772ca021a	Refactoring Pull base class for page partioner out of page partioner and add random page partioner derived class.	2023-01-24 10:13:03 +01:00
Matthias Bisping	c4eeb956ca	Fix incorrect font size kwarg	2023-01-24 10:12:44 +01:00
Matthias Bisping	d823ebf7c6	Refactoring Refactor page partitioner	2023-01-24 10:00:35 +01:00
Matthias Bisping	71ffb28381	Re-add actual block generator calls back	2023-01-23 15:38:20 +01:00
Matthias Bisping	9dfbe9a142	Add different basic table layouts	2023-01-23 15:30:12 +01:00
Matthias Bisping	0eb57056ba	Fix / improve table cell border drawing	2023-01-23 15:16:26 +01:00
Matthias Bisping	70802d6341	Fix error in image padding logic	2023-01-23 14:38:42 +01:00
Matthias Bisping	52776494cb	Tweak font selection	2023-01-23 14:15:41 +01:00
Matthias Bisping	7d8842b4ac	Refactoring & Add table captioons	2023-01-23 13:14:48 +01:00
Matthias Bisping	9e77e25afb	Refactoring Move text block generation code into its own class.	2023-01-23 12:22:42 +01:00
Matthias Bisping	b3480491be	Refactoring Move line formatting code into its own class.	2023-01-23 12:13:22 +01:00
Matthias Bisping	3d0c2396ee	Remove obsolete code	2023-01-23 12:05:01 +01:00
Matthias Bisping	f8c2d691b2	[WIP] Figure captions	2023-01-23 12:04:27 +01:00
Matthias Bisping	ced1cd9559	More tables less text	2023-01-18 19:51:13 +01:00
Matthias Bisping	738c51a337	Merge branch 'refactoring' of git+ssh://git.iqser.com:2222/rr/cv-analysis into refactoring	2023-01-18 19:25:24 +01:00
Matthias Bisping	48f6aebc13	Tweaking	2023-01-18 19:22:48 +01:00
Matthias Bisping	73d546367c	Tweaking	2023-01-18 18:53:56 +01:00
Matthias Bisping	cfe4b58e38	Add option to put specific text into text block	2023-01-18 17:19:12 +01:00
Matthias Bisping	839a264816	Add option to put specific text into text block	2023-01-18 17:18:56 +01:00
Matthias Bisping	fd57fe99b7	Tweak content selecton logic	2023-01-18 16:56:25 +01:00
Matthias Bisping	5e51fd1d10	Tweak content selecton logic	2023-01-18 16:17:20 +01:00
Matthias Bisping	9c7c5e315f	Select cell content conditioned on cell size class	2023-01-18 15:54:24 +01:00
Matthias Bisping	3da613af94	[WIP] recursive random table: Add recursive construction	2023-01-18 15:04:04 +01:00
Matthias Bisping	30e6350881	[WIP] recursive random table Add padding between cell content and cell border	2023-01-18 14:50:15 +01:00
Matthias Bisping	384f0e5f28	[WIP] recursive random table: tweak cell broder and fill logic	2023-01-18 13:42:38 +01:00
Matthias Bisping	4d181448b6	[WIP] recursive random table: basic version working	2023-01-18 13:30:19 +01:00
Matthias Bisping	a5cd3d6ec9	[WIP] recursive random table	2023-01-18 13:11:15 +01:00
Matthias Bisping	893622a73e	[WIP] recursive random table	2023-01-18 11:45:19 +01:00
Matthias Bisping	4d11a157e5	Cache font selection	2023-01-18 09:39:04 +01:00
Matthias Bisping	4c10d521e2	[WIP] random font selection	2023-01-17 14:58:54 +01:00
Matthias Bisping	0f6cbec1d5	Refactoring	2023-01-17 13:43:12 +01:00
Matthias Bisping	54484d9ad0	[WIP] random table segments: Table via tabulate and text -> image	2023-01-17 13:23:53 +01:00
Matthias Bisping	ca190721d6	[WIP] random table segments & refactoring	2023-01-17 13:17:33 +01:00
Matthias Bisping	5611314ff3	[WIP] random table segments	2023-01-17 11:42:11 +01:00
Matthias Bisping	4ecfe16df5	Constrain possible random layouts	2023-01-17 11:12:25 +01:00
Matthias Bisping	38c0614396	Assign box type by box aspect ratio	2023-01-17 10:59:53 +01:00
Matthias Bisping	64565f9cb0	Complete first iteraton of random plot generation	2023-01-17 10:55:09 +01:00
Matthias Bisping	232c6bed4b	Refactoring: Rename	2023-01-17 09:54:50 +01:00
Matthias Bisping	8d34873d1c	[WIP] random plot segments	2023-01-16 19:33:46 +01:00
Matthias Bisping	78a951a319	[WIP] random plot segments	2023-01-16 18:42:34 +01:00
Matthias Bisping	8d57d2043d	[WIP] random text segments	2023-01-16 18:18:22 +01:00
Matthias Bisping	41fdda4955	[WIP] random text segments	2023-01-16 17:55:20 +01:00
Matthias Bisping	4dfdd579a2	[WIP] random text segments	2023-01-16 17:41:30 +01:00
Matthias Bisping	e831ab1382	[WIP] random text segments	2023-01-16 17:17:50 +01:00
Matthias Bisping	6fead2d9b9	[WIP] random text segments	2023-01-16 16:34:18 +01:00
Matthias Bisping	1012988475	Remove obsolete code	2023-01-16 13:35:59 +01:00
Matthias Bisping	5bc1550eae	Complete page partitioning into empty boxes Completed logic for partitioning page into content boxes. Next step is to fill content boxes with random content.	2023-01-16 13:32:38 +01:00
Matthias Bisping	29741fc5da	[WIP] random content box generation	2023-01-16 12:07:56 +01:00
Matthias Bisping	4772e3037c	Remove obsolete code	2023-01-16 11:16:27 +01:00
Matthias Bisping	dd6ab94aa2	[WIP] Replace texture generation with loadig textures from files	2023-01-16 10:59:13 +01:00
Matthias Bisping	eaca8725de	Balance colors of base textures Make base textures more similar in color balance	2023-01-16 10:19:05 +01:00
Matthias Bisping	4af202f098	Add base paper textures	2023-01-16 10:09:34 +01:00
Matthias Bisping	1199845cdf	Refactoring: Rename	2023-01-16 08:47:45 +01:00
Matthias Bisping	4578413748	Improve page texture logic	2023-01-11 14:05:39 +01:00
Matthias Bisping	d5d67cb064	Fix image format (RGB/A, float/uint8, [0, 1/255]) issues	2023-01-11 12:17:07 +01:00
Matthias Bisping	d8542762e6	[WIP] Add augmentation pipeline to page generation	2023-01-10 17:13:39 +01:00
Matthias Bisping	caef416077	Tweak page generation	2023-01-10 16:37:35 +01:00
Matthias Bisping	a8708ffc56	[WIP] page generation for tests	2023-01-10 16:31:02 +01:00
Matthias Bisping	3f0bbf0fc7	Refactoring	2023-01-10 11:59:01 +01:00
Matthias Bisping	2fec39eda6	Add docstring	2023-01-10 11:31:13 +01:00
Matthias Bisping	16cc0007ed	Refactoring	2023-01-10 11:30:36 +01:00
Matthias Bisping	3d83489819	Refactoring: Make single pass rectangle merging stateless	2023-01-10 11:14:15 +01:00
Matthias Bisping	3134021596	Add typehints	2023-01-10 10:20:07 +01:00
Matthias Bisping	3cb857d830	Refactoring: Move	2023-01-10 10:19:49 +01:00
Matthias Bisping	194102939e	Refactoring - add typehints - other minor refactorings	2023-01-10 10:10:08 +01:00
Matthias Bisping	5d1d9516b5	Add fixmes and format docstring	2023-01-10 09:39:23 +01:00
Matthias Bisping	77f85e9de1	Refactoring Various	2023-01-09 17:22:01 +01:00
Matthias Bisping	c00081b2bc	Refactoring: Move	2023-01-09 17:01:36 +01:00
Matthias Bisping	619f67f1fd	Refactoring Various	2023-01-09 16:51:58 +01:00
Matthias Bisping	a97f8def7c	Refactor metrics	2023-01-09 16:22:52 +01:00
Matthias Bisping	65e9735bd9	Refactor metrics	2023-01-09 15:53:53 +01:00
Matthias Bisping	689be75478	Refactoring	2023-01-09 15:47:12 +01:00
Matthias Bisping	acf46a7a48	[WIP] Refactoring meta-detection	2023-01-09 15:40:32 +01:00
Matthias Bisping	0f11441b20	[WIP] Refactoring meta-detection	2023-01-09 15:32:51 +01:00
Matthias Bisping	fa1fa15cc8	[WIP] Refactoring meta-detection	2023-01-09 15:05:00 +01:00
Matthias Bisping	17c40c996a	[WIP] Refactoring meta-detection	2023-01-09 14:44:22 +01:00
Matthias Bisping	99af2943b5	[WIP] Refactoring meta-detection	2023-01-09 14:33:27 +01:00
Matthias Bisping	0e6cb495e8	[WIP] Refactoring meta-detection	2023-01-09 14:29:22 +01:00
Matthias Bisping	012e705e70	[WIP] Refactoring meta-detection	2023-01-09 14:22:18 +01:00
Matthias Bisping	8327794685	[WIP] Refactoring meta-detection	2023-01-09 13:42:23 +01:00
Matthias Bisping	72bc52dc7b	[WIP] Refactoring meta-detection	2023-01-09 13:27:26 +01:00
Matthias Bisping	557d091a54	[WIP] Refactoring meta-detection	2023-01-09 12:03:50 +01:00
Matthias Bisping	b540cfd0f2	[WIP] Refactoring meta-detection	2023-01-09 11:38:55 +01:00
Matthias Bisping	8824c5c3ea	Refactoring	2023-01-09 11:33:38 +01:00
Matthias Bisping	94e9210faf	Refactoring Various	2023-01-09 11:21:43 +01:00
Matthias Bisping	06d6863cc5	Format docstrings	2023-01-04 18:50:27 +01:00
Matthias Bisping	dfd87cb4b0	Refactoring	2023-01-04 18:29:52 +01:00
Matthias Bisping	cd5457840b	Refactoring Various	2023-01-04 18:13:54 +01:00
Matthias Bisping	eee2f0e256	Refactoring Rename module	2023-01-04 17:40:43 +01:00
Matthias Bisping	9d2f166fbf	Refactoring Various	2023-01-04 17:36:06 +01:00
Matthias Bisping	97fb4b645d	Refactoring Remove more code that is not adhering to separation of concerns from Rectangle class	2023-01-04 16:49:44 +01:00
Matthias Bisping	00e53fb54d	Refactoring Remove code that is not adhering to separation of concerns from Rectangle class	2023-01-04 16:29:43 +01:00
Matthias Bisping	4be91de036	Refactoring Further clean up Rectangle class	2023-01-04 15:26:39 +01:00
Matthias Bisping	8c6b940364	Refactoring Clean up Rectangle class	2023-01-04 14:57:47 +01:00
Matthias Bisping	cdb12baccd	Format docstrings	2023-01-04 13:57:51 +01:00
Matthias Bisping	ac84494613	Refactoring	2023-01-04 13:32:57 +01:00
Matthias Bisping	77f565c652	Fix Fix a typehint Fix a bug that would happen when a generator is passed	2023-01-04 12:06:28 +01:00
Matthias Bisping	47e657aaa3	Refactoring Clean up and prove correctness of intersection computation	2023-01-04 12:05:57 +01:00
Matthias Bisping	b592497b75	Refactoring	2023-01-04 10:58:24 +01:00
Matthias Bisping	c0d961bc39	Merge branch 'poetrify' into refactoring	2023-01-04 10:12:50 +01:00
Matthias Bisping	8260ae58f9	Refactoring Make adjacency checking code clean	2023-01-04 10:11:46 +01:00
Matthias Bisping	068f75d35b	Apply black	2023-01-04 10:11:28 +01:00