Complete page partitioning into empty boxes

Completed logic for partitioning page into content boxes. Next step is
to fill content boxes with random content.
This commit is contained in:
Matthias Bisping 2023-01-16 13:32:37 +01:00
parent 29741fc5da
commit 5bc1550eae

View File

@ -5,7 +5,7 @@ import albumentations as A
import cv2 as cv
import numpy as np
import pytest
from PIL import Image, ImageOps, ImageDraw
from PIL import Image, ImageOps
from PIL.Image import Transpose
Image_t = Union[Image.Image, np.ndarray]
@ -325,13 +325,26 @@ def blank_page(texture, texture_fn) -> np.ndarray:
class ContentBoxGenerator:
def __init__(self):
self.margin_percentage = 0.2
self.max_depth = 5
self.recursion_probability = 0.5
self.left_margin_percentage = 0.05
self.right_margin_percentage = 0.05
self.top_margin_percentage = 0.1
self.bottom_margin_percentage = 0.1
def __call__(self, page: Image_t) -> List[Rectangle]:
box = Rectangle(0, 0, *page.size)
self.margin_percentage = 0.005
self.max_depth = 3
self.initial_recursion_probability = 1
self.recursion_probability_decay = 0.1
def __call__(self, page: Image.Image) -> List[Rectangle]:
left_margin = int(page.width * self.left_margin_percentage)
right_margin = int(page.width * self.right_margin_percentage)
top_margin = int(page.height * self.top_margin_percentage)
bottom_margin = int(page.height * self.bottom_margin_percentage)
box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin)
boxes = lflatten(self.generate_content_boxes(box))
boxes = self.drop_small_boxes(boxes, *page.size)
# boxes = merge_related_rectangles(boxes)
return boxes
def draw_boxes(self, page: Image, boxes: Iterable[Rectangle]):
@ -343,7 +356,7 @@ class ContentBoxGenerator:
yield box
else:
child_boxes = self.generate_random_child_boxes(box)
if self.recurse():
if self.recurse(depth):
yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
else:
yield child_boxes
@ -351,22 +364,43 @@ class ContentBoxGenerator:
def generate_random_child_boxes(self, box: Rectangle) -> Tuple[Rectangle, Rectangle]:
axis = random.choice(["x", "y"])
point, edge = (box.x1, box.width) if axis == "x" else (box.y1, box.height)
split_coordinate = random.uniform(self.margin_percentage, 1 - self.margin_percentage) * edge + point
edge_anchor_point, edge_length = (box.x1, box.width) if axis == "x" else (box.y1, box.height)
split_coordinate = random.uniform(0.3, 0.7) * edge_length + edge_anchor_point
child_boxes = self.get_child_boxes(box, split_coordinate, axis)
return child_boxes
def get_child_boxes(self, box: Rectangle, split_coordinate, axis) -> Tuple[Rectangle, Rectangle]:
def low(p):
return p * (1 + self.margin_percentage)
def high(p):
return p * (1 - self.margin_percentage)
if axis == "x":
return (
Rectangle(box.x1, box.y1, split_coordinate, box.y2),
Rectangle(split_coordinate, box.y1, box.x2, box.y2),
Rectangle(low(box.x1), low(box.y1), high(split_coordinate), high(box.y2)),
Rectangle(low(split_coordinate), low(box.y1), high(box.x2), high(box.y2)),
)
else:
return (
Rectangle(box.x1, box.y1, box.x2, split_coordinate),
Rectangle(box.x1, split_coordinate, box.x2, box.y2),
Rectangle(low(box.x1), low(box.y1), high(box.x2), high(split_coordinate)),
Rectangle(low(box.x1), low(split_coordinate), high(box.x2), high(box.y2)),
)
def recurse(self):
return random.random() < self.recursion_probability
def recurse(self, depth):
p = self.recursion_probability(depth)
return random.random() <= self.recursion_probability(depth)
def recursion_probability(self, depth):
return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth
def drop_small_boxes(
self,
boxes: Iterable[Rectangle],
page_width,
page_height,
min_percentage=0.13,
) -> List[Rectangle]:
min_width = page_width * min_percentage
min_height = page_height * min_percentage
return [b for b in boxes if b.width > min_width and b.height > min_height]