import abc from typing import List, Tuple from PIL import Image from funcy import lflatten from cv_analysis.utils.rectangle import Rectangle from synthesis.random import rnd class PagePartitioner(abc.ABC): # TODO: produce boxes for page numbers, headers and footers def __init__(self): self.left_margin_percentage = 0.05 self.right_margin_percentage = 0.05 self.top_margin_percentage = 0.1 self.bottom_margin_percentage = 0.1 self.recursive_margin_percentage = 0.007 self.max_recursion_depth = 3 self.initial_recursion_probability = 1 self.recursion_probability_decay = 0.1 def __call__(self, page: Image.Image) -> List[Rectangle]: left_margin = int(page.width * self.left_margin_percentage) right_margin = int(page.width * self.right_margin_percentage) top_margin = int(page.height * self.top_margin_percentage) bottom_margin = int(page.height * self.bottom_margin_percentage) box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin) boxes = lflatten(self.generate_content_boxes(box)) return boxes @abc.abstractmethod def generate_content_boxes(self, box: Rectangle, depth=0): raise NotImplementedError def generate_child_boxes(self, box: Rectangle, axis, split_percentage=0.5) -> Tuple[Rectangle, Rectangle]: assert axis in ["x", "y"] edge_anchor_point, edge_length = (box.x1, box.width) if axis == "x" else (box.y1, box.height) split_coordinate = split_percentage * edge_length + edge_anchor_point child_boxes = get_child_boxes(box, split_coordinate, axis, self.recursive_margin_percentage) return child_boxes def recurse(self, depth): return rnd.random() <= self.recursion_probability(depth) def recursion_probability(self, depth): return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth def get_child_boxes(box: Rectangle, split_coordinate, axis, margin_percentage) -> Tuple[Rectangle, Rectangle]: assert axis in ["x", "y"] def low(point_1d): return point_1d * (1 + margin_percentage) def high(point_1d): return point_1d * (1 - margin_percentage) if axis == "x": return ( Rectangle(low(box.x1), low(box.y1), high(split_coordinate), high(box.y2)), Rectangle(low(split_coordinate), low(box.y1), high(box.x2), high(box.y2)), ) else: return ( Rectangle(low(box.x1), low(box.y1), high(box.x2), high(split_coordinate)), Rectangle(low(box.x1), low(split_coordinate), high(box.x2), high(box.y2)), )