From 4ec3429dec932cadd828376610950b8ad84a51f4 Mon Sep 17 00:00:00 2001 From: Matthias Bisping Date: Wed, 1 Feb 2023 16:51:03 +0100 Subject: [PATCH] Refactoring: Move Move page partitioner into its own module --- synthesis/partitioner/__init__.py | 0 synthesis/partitioner/page_partitioner.py | 71 ++++++++++++++++++++++ synthesis/random.py | 8 +++ test/fixtures/page_generation/page.py | 73 +---------------------- 4 files changed, 81 insertions(+), 71 deletions(-) create mode 100644 synthesis/partitioner/__init__.py create mode 100644 synthesis/partitioner/page_partitioner.py create mode 100644 synthesis/random.py diff --git a/synthesis/partitioner/__init__.py b/synthesis/partitioner/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/synthesis/partitioner/page_partitioner.py b/synthesis/partitioner/page_partitioner.py new file mode 100644 index 0000000..aea30e6 --- /dev/null +++ b/synthesis/partitioner/page_partitioner.py @@ -0,0 +1,71 @@ +import abc +from typing import List, Tuple + +from PIL import Image +from funcy import lflatten + +from cv_analysis.utils.rectangle import Rectangle +from synthesis.random import rnd + + +class PagePartitioner(abc.ABC): + # TODO: produce boxes for page numbers, headers and footers + def __init__(self): + self.left_margin_percentage = 0.05 + self.right_margin_percentage = 0.05 + self.top_margin_percentage = 0.1 + self.bottom_margin_percentage = 0.1 + + self.recursive_margin_percentage = 0.007 + self.max_recursion_depth = 3 + self.initial_recursion_probability = 1 + self.recursion_probability_decay = 0.1 + + def __call__(self, page: Image.Image) -> List[Rectangle]: + left_margin = int(page.width * self.left_margin_percentage) + right_margin = int(page.width * self.right_margin_percentage) + top_margin = int(page.height * self.top_margin_percentage) + bottom_margin = int(page.height * self.bottom_margin_percentage) + + box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin) + boxes = lflatten(self.generate_content_boxes(box)) + return boxes + + @abc.abstractmethod + def generate_content_boxes(self, box: Rectangle, depth=0): + raise NotImplementedError + + def generate_child_boxes(self, box: Rectangle, axis, split_percentage=0.5) -> Tuple[Rectangle, Rectangle]: + assert axis in ["x", "y"] + + edge_anchor_point, edge_length = (box.x1, box.width) if axis == "x" else (box.y1, box.height) + split_coordinate = split_percentage * edge_length + edge_anchor_point + child_boxes = get_child_boxes(box, split_coordinate, axis, self.recursive_margin_percentage) + return child_boxes + + def recurse(self, depth): + return rnd.random() <= self.recursion_probability(depth) + + def recursion_probability(self, depth): + return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth + + +def get_child_boxes(box: Rectangle, split_coordinate, axis, margin_percentage) -> Tuple[Rectangle, Rectangle]: + assert axis in ["x", "y"] + + def low(point_1d): + return point_1d * (1 + margin_percentage) + + def high(point_1d): + return point_1d * (1 - margin_percentage) + + if axis == "x": + return ( + Rectangle(low(box.x1), low(box.y1), high(split_coordinate), high(box.y2)), + Rectangle(low(split_coordinate), low(box.y1), high(box.x2), high(box.y2)), + ) + else: + return ( + Rectangle(low(box.x1), low(box.y1), high(box.x2), high(split_coordinate)), + Rectangle(low(box.x1), low(split_coordinate), high(box.x2), high(box.y2)), + ) diff --git a/synthesis/random.py b/synthesis/random.py new file mode 100644 index 0000000..c28c206 --- /dev/null +++ b/synthesis/random.py @@ -0,0 +1,8 @@ +import random + +from loguru import logger + +random_seed = random.randint(0, 2**32 - 1) + +logger.info(f"Random seed: {random_seed}") +rnd = random.Random(random_seed) diff --git a/test/fixtures/page_generation/page.py b/test/fixtures/page_generation/page.py index 55e253f..4bc2a76 100644 --- a/test/fixtures/page_generation/page.py +++ b/test/fixtures/page_generation/page.py @@ -27,23 +27,18 @@ from cv_analysis.utils.image_operations import blur, sharpen, overlay, superimpo from cv_analysis.utils.merging import merge_related_rectangles from cv_analysis.utils.postprocessing import remove_overlapping, remove_included from cv_analysis.utils.spacial import area +from synthesis.partitioner.page_partitioner import PagePartitioner +from synthesis.random import rnd from synthesis.segment.content_rectangle import ContentRectangle logger.remove() logger.add(sys.stderr, level="INFO") -random_seed = random.randint(0, 2**32 - 1) -# random_seed = 2973413116 -# random_seed = 2212357755 - -rnd = random.Random(random_seed) -logger.info(f"Random seed: {random_seed}") from funcy import ( juxt, compose, identity, - lflatten, lmap, first, iterate, @@ -197,7 +192,6 @@ def tinted_blank_page(size, color, color_intensity): @pytest.fixture def blank_page(size, color, color_intensity): - rnd.seed(random_seed) page = Image.new("RGBA", size, color=(255, 255, 255, 0)) return page @@ -1192,48 +1186,6 @@ def paste_contents(page, contents: Iterable[ContentRectangle]): return page -# TODO: produce boxes for page numbers, headers and footers -class PagePartitioner(abc.ABC): - def __init__(self): - self.left_margin_percentage = 0.05 - self.right_margin_percentage = 0.05 - self.top_margin_percentage = 0.1 - self.bottom_margin_percentage = 0.1 - - self.recursive_margin_percentage = 0.007 - self.max_recursion_depth = 3 - self.initial_recursion_probability = 1 - self.recursion_probability_decay = 0.1 - - def __call__(self, page: Image.Image) -> List[Rectangle]: - left_margin = int(page.width * self.left_margin_percentage) - right_margin = int(page.width * self.right_margin_percentage) - top_margin = int(page.height * self.top_margin_percentage) - bottom_margin = int(page.height * self.bottom_margin_percentage) - - box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin) - boxes = lflatten(self.generate_content_boxes(box)) - return boxes - - @abc.abstractmethod - def generate_content_boxes(self, box: Rectangle, depth=0): - raise NotImplementedError - - def generate_child_boxes(self, box: Rectangle, axis, split_percentage=0.5) -> Tuple[Rectangle, Rectangle]: - assert axis in ["x", "y"] - - edge_anchor_point, edge_length = (box.x1, box.width) if axis == "x" else (box.y1, box.height) - split_coordinate = split_percentage * edge_length + edge_anchor_point - child_boxes = get_child_boxes(box, split_coordinate, axis, self.recursive_margin_percentage) - return child_boxes - - def recurse(self, depth): - return rnd.random() <= self.recursion_probability(depth) - - def recursion_probability(self, depth): - return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth - - class RandomPagePartitioner(PagePartitioner): def __init__(self): super().__init__() @@ -1275,27 +1227,6 @@ class TwoColumnPagePartitioner(PagePartitioner): yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes) -def get_child_boxes(box: Rectangle, split_coordinate, axis, margin_percentage) -> Tuple[Rectangle, Rectangle]: - assert axis in ["x", "y"] - - def low(point_1d): - return point_1d * (1 + margin_percentage) - - def high(point_1d): - return point_1d * (1 - margin_percentage) - - if axis == "x": - return ( - Rectangle(low(box.x1), low(box.y1), high(split_coordinate), high(box.y2)), - Rectangle(low(split_coordinate), low(box.y1), high(box.x2), high(box.y2)), - ) - else: - return ( - Rectangle(low(box.x1), low(box.y1), high(box.x2), high(split_coordinate)), - Rectangle(low(box.x1), low(split_coordinate), high(box.x2), high(box.y2)), - ) - - @pytest.fixture( params=[ TwoColumnPagePartitioner,