diff --git a/test/fixtures/page_generation/page.py b/test/fixtures/page_generation/page.py index f16dbff..21e8d0f 100644 --- a/test/fixtures/page_generation/page.py +++ b/test/fixtures/page_generation/page.py @@ -5,7 +5,7 @@ import albumentations as A import cv2 as cv import numpy as np import pytest -from PIL import Image, ImageOps, ImageDraw +from PIL import Image, ImageOps from PIL.Image import Transpose Image_t = Union[Image.Image, np.ndarray] @@ -325,13 +325,26 @@ def blank_page(texture, texture_fn) -> np.ndarray: class ContentBoxGenerator: def __init__(self): - self.margin_percentage = 0.2 - self.max_depth = 5 - self.recursion_probability = 0.5 + self.left_margin_percentage = 0.05 + self.right_margin_percentage = 0.05 + self.top_margin_percentage = 0.1 + self.bottom_margin_percentage = 0.1 - def __call__(self, page: Image_t) -> List[Rectangle]: - box = Rectangle(0, 0, *page.size) + self.margin_percentage = 0.005 + self.max_depth = 3 + self.initial_recursion_probability = 1 + self.recursion_probability_decay = 0.1 + + def __call__(self, page: Image.Image) -> List[Rectangle]: + left_margin = int(page.width * self.left_margin_percentage) + right_margin = int(page.width * self.right_margin_percentage) + top_margin = int(page.height * self.top_margin_percentage) + bottom_margin = int(page.height * self.bottom_margin_percentage) + + box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin) boxes = lflatten(self.generate_content_boxes(box)) + boxes = self.drop_small_boxes(boxes, *page.size) + # boxes = merge_related_rectangles(boxes) return boxes def draw_boxes(self, page: Image, boxes: Iterable[Rectangle]): @@ -343,7 +356,7 @@ class ContentBoxGenerator: yield box else: child_boxes = self.generate_random_child_boxes(box) - if self.recurse(): + if self.recurse(depth): yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes) else: yield child_boxes @@ -351,22 +364,43 @@ class ContentBoxGenerator: def generate_random_child_boxes(self, box: Rectangle) -> Tuple[Rectangle, Rectangle]: axis = random.choice(["x", "y"]) - point, edge = (box.x1, box.width) if axis == "x" else (box.y1, box.height) - split_coordinate = random.uniform(self.margin_percentage, 1 - self.margin_percentage) * edge + point + edge_anchor_point, edge_length = (box.x1, box.width) if axis == "x" else (box.y1, box.height) + split_coordinate = random.uniform(0.3, 0.7) * edge_length + edge_anchor_point child_boxes = self.get_child_boxes(box, split_coordinate, axis) return child_boxes def get_child_boxes(self, box: Rectangle, split_coordinate, axis) -> Tuple[Rectangle, Rectangle]: + def low(p): + return p * (1 + self.margin_percentage) + + def high(p): + return p * (1 - self.margin_percentage) + if axis == "x": return ( - Rectangle(box.x1, box.y1, split_coordinate, box.y2), - Rectangle(split_coordinate, box.y1, box.x2, box.y2), + Rectangle(low(box.x1), low(box.y1), high(split_coordinate), high(box.y2)), + Rectangle(low(split_coordinate), low(box.y1), high(box.x2), high(box.y2)), ) else: return ( - Rectangle(box.x1, box.y1, box.x2, split_coordinate), - Rectangle(box.x1, split_coordinate, box.x2, box.y2), + Rectangle(low(box.x1), low(box.y1), high(box.x2), high(split_coordinate)), + Rectangle(low(box.x1), low(split_coordinate), high(box.x2), high(box.y2)), ) - def recurse(self): - return random.random() < self.recursion_probability + def recurse(self, depth): + p = self.recursion_probability(depth) + return random.random() <= self.recursion_probability(depth) + + def recursion_probability(self, depth): + return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth + + def drop_small_boxes( + self, + boxes: Iterable[Rectangle], + page_width, + page_height, + min_percentage=0.13, + ) -> List[Rectangle]: + min_width = page_width * min_percentage + min_height = page_height * min_percentage + return [b for b in boxes if b.width > min_width and b.height > min_height]