72 lines
2.7 KiB
Python
72 lines
2.7 KiB
Python
import abc
|
|
from typing import List, Tuple
|
|
|
|
from PIL import Image
|
|
from funcy import lflatten
|
|
|
|
from cv_analysis.utils.rectangle import Rectangle
|
|
from synthesis.randomization import rnd
|
|
|
|
|
|
class PagePartitioner(abc.ABC):
|
|
# TODO: produce boxes for page numbers, headers and footers
|
|
def __init__(self):
|
|
self.left_margin_percentage = 0.05
|
|
self.right_margin_percentage = 0.05
|
|
self.top_margin_percentage = 0.1
|
|
self.bottom_margin_percentage = 0.1
|
|
|
|
self.recursive_margin_percentage = 0.007
|
|
self.max_recursion_depth = 3
|
|
self.initial_recursion_probability = 1
|
|
self.recursion_probability_decay = 0.1
|
|
|
|
def __call__(self, page: Image.Image) -> List[Rectangle]:
|
|
left_margin = int(page.width * self.left_margin_percentage)
|
|
right_margin = int(page.width * self.right_margin_percentage)
|
|
top_margin = int(page.height * self.top_margin_percentage)
|
|
bottom_margin = int(page.height * self.bottom_margin_percentage)
|
|
|
|
box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin)
|
|
boxes = lflatten(self.generate_content_boxes(box))
|
|
return boxes
|
|
|
|
@abc.abstractmethod
|
|
def generate_content_boxes(self, box: Rectangle, depth=0):
|
|
raise NotImplementedError
|
|
|
|
def generate_child_boxes(self, box: Rectangle, axis, split_percentage=0.5) -> Tuple[Rectangle, Rectangle]:
|
|
assert axis in ["x", "y"]
|
|
|
|
edge_anchor_point, edge_length = (box.x1, box.width) if axis == "x" else (box.y1, box.height)
|
|
split_coordinate = split_percentage * edge_length + edge_anchor_point
|
|
child_boxes = get_child_boxes(box, split_coordinate, axis, self.recursive_margin_percentage)
|
|
return child_boxes
|
|
|
|
def recurse(self, depth):
|
|
return rnd.random() <= self.recursion_probability(depth)
|
|
|
|
def recursion_probability(self, depth):
|
|
return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth
|
|
|
|
|
|
def get_child_boxes(box: Rectangle, split_coordinate, axis, margin_percentage) -> Tuple[Rectangle, Rectangle]:
|
|
assert axis in ["x", "y"]
|
|
|
|
def low(point_1d):
|
|
return point_1d * (1 + margin_percentage)
|
|
|
|
def high(point_1d):
|
|
return point_1d * (1 - margin_percentage)
|
|
|
|
if axis == "x":
|
|
return (
|
|
Rectangle(low(box.x1), low(box.y1), high(split_coordinate), high(box.y2)),
|
|
Rectangle(low(split_coordinate), low(box.y1), high(box.x2), high(box.y2)),
|
|
)
|
|
else:
|
|
return (
|
|
Rectangle(low(box.x1), low(box.y1), high(box.x2), high(split_coordinate)),
|
|
Rectangle(low(box.x1), low(split_coordinate), high(box.x2), high(box.y2)),
|
|
)
|