Refactoring: Move

Move page partitioner into its own module
This commit is contained in:
Matthias Bisping 2023-02-01 16:51:03 +01:00
parent bdcb2f1bef
commit 4ec3429dec
4 changed files with 81 additions and 71 deletions

View File

View File

@ -0,0 +1,71 @@
import abc
from typing import List, Tuple
from PIL import Image
from funcy import lflatten
from cv_analysis.utils.rectangle import Rectangle
from synthesis.random import rnd
class PagePartitioner(abc.ABC):
# TODO: produce boxes for page numbers, headers and footers
def __init__(self):
self.left_margin_percentage = 0.05
self.right_margin_percentage = 0.05
self.top_margin_percentage = 0.1
self.bottom_margin_percentage = 0.1
self.recursive_margin_percentage = 0.007
self.max_recursion_depth = 3
self.initial_recursion_probability = 1
self.recursion_probability_decay = 0.1
def __call__(self, page: Image.Image) -> List[Rectangle]:
left_margin = int(page.width * self.left_margin_percentage)
right_margin = int(page.width * self.right_margin_percentage)
top_margin = int(page.height * self.top_margin_percentage)
bottom_margin = int(page.height * self.bottom_margin_percentage)
box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin)
boxes = lflatten(self.generate_content_boxes(box))
return boxes
@abc.abstractmethod
def generate_content_boxes(self, box: Rectangle, depth=0):
raise NotImplementedError
def generate_child_boxes(self, box: Rectangle, axis, split_percentage=0.5) -> Tuple[Rectangle, Rectangle]:
assert axis in ["x", "y"]
edge_anchor_point, edge_length = (box.x1, box.width) if axis == "x" else (box.y1, box.height)
split_coordinate = split_percentage * edge_length + edge_anchor_point
child_boxes = get_child_boxes(box, split_coordinate, axis, self.recursive_margin_percentage)
return child_boxes
def recurse(self, depth):
return rnd.random() <= self.recursion_probability(depth)
def recursion_probability(self, depth):
return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth
def get_child_boxes(box: Rectangle, split_coordinate, axis, margin_percentage) -> Tuple[Rectangle, Rectangle]:
assert axis in ["x", "y"]
def low(point_1d):
return point_1d * (1 + margin_percentage)
def high(point_1d):
return point_1d * (1 - margin_percentage)
if axis == "x":
return (
Rectangle(low(box.x1), low(box.y1), high(split_coordinate), high(box.y2)),
Rectangle(low(split_coordinate), low(box.y1), high(box.x2), high(box.y2)),
)
else:
return (
Rectangle(low(box.x1), low(box.y1), high(box.x2), high(split_coordinate)),
Rectangle(low(box.x1), low(split_coordinate), high(box.x2), high(box.y2)),
)

8
synthesis/random.py Normal file
View File

@ -0,0 +1,8 @@
import random
from loguru import logger
random_seed = random.randint(0, 2**32 - 1)
logger.info(f"Random seed: {random_seed}")
rnd = random.Random(random_seed)

View File

@ -27,23 +27,18 @@ from cv_analysis.utils.image_operations import blur, sharpen, overlay, superimpo
from cv_analysis.utils.merging import merge_related_rectangles
from cv_analysis.utils.postprocessing import remove_overlapping, remove_included
from cv_analysis.utils.spacial import area
from synthesis.partitioner.page_partitioner import PagePartitioner
from synthesis.random import rnd
from synthesis.segment.content_rectangle import ContentRectangle
logger.remove()
logger.add(sys.stderr, level="INFO")
random_seed = random.randint(0, 2**32 - 1)
# random_seed = 2973413116
# random_seed = 2212357755
rnd = random.Random(random_seed)
logger.info(f"Random seed: {random_seed}")
from funcy import (
juxt,
compose,
identity,
lflatten,
lmap,
first,
iterate,
@ -197,7 +192,6 @@ def tinted_blank_page(size, color, color_intensity):
@pytest.fixture
def blank_page(size, color, color_intensity):
rnd.seed(random_seed)
page = Image.new("RGBA", size, color=(255, 255, 255, 0))
return page
@ -1192,48 +1186,6 @@ def paste_contents(page, contents: Iterable[ContentRectangle]):
return page
# TODO: produce boxes for page numbers, headers and footers
class PagePartitioner(abc.ABC):
def __init__(self):
self.left_margin_percentage = 0.05
self.right_margin_percentage = 0.05
self.top_margin_percentage = 0.1
self.bottom_margin_percentage = 0.1
self.recursive_margin_percentage = 0.007
self.max_recursion_depth = 3
self.initial_recursion_probability = 1
self.recursion_probability_decay = 0.1
def __call__(self, page: Image.Image) -> List[Rectangle]:
left_margin = int(page.width * self.left_margin_percentage)
right_margin = int(page.width * self.right_margin_percentage)
top_margin = int(page.height * self.top_margin_percentage)
bottom_margin = int(page.height * self.bottom_margin_percentage)
box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin)
boxes = lflatten(self.generate_content_boxes(box))
return boxes
@abc.abstractmethod
def generate_content_boxes(self, box: Rectangle, depth=0):
raise NotImplementedError
def generate_child_boxes(self, box: Rectangle, axis, split_percentage=0.5) -> Tuple[Rectangle, Rectangle]:
assert axis in ["x", "y"]
edge_anchor_point, edge_length = (box.x1, box.width) if axis == "x" else (box.y1, box.height)
split_coordinate = split_percentage * edge_length + edge_anchor_point
child_boxes = get_child_boxes(box, split_coordinate, axis, self.recursive_margin_percentage)
return child_boxes
def recurse(self, depth):
return rnd.random() <= self.recursion_probability(depth)
def recursion_probability(self, depth):
return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth
class RandomPagePartitioner(PagePartitioner):
def __init__(self):
super().__init__()
@ -1275,27 +1227,6 @@ class TwoColumnPagePartitioner(PagePartitioner):
yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
def get_child_boxes(box: Rectangle, split_coordinate, axis, margin_percentage) -> Tuple[Rectangle, Rectangle]:
assert axis in ["x", "y"]
def low(point_1d):
return point_1d * (1 + margin_percentage)
def high(point_1d):
return point_1d * (1 - margin_percentage)
if axis == "x":
return (
Rectangle(low(box.x1), low(box.y1), high(split_coordinate), high(box.y2)),
Rectangle(low(split_coordinate), low(box.y1), high(box.x2), high(box.y2)),
)
else:
return (
Rectangle(low(box.x1), low(box.y1), high(box.x2), high(split_coordinate)),
Rectangle(low(box.x1), low(split_coordinate), high(box.x2), high(box.y2)),
)
@pytest.fixture(
params=[
TwoColumnPagePartitioner,