Refactoring: Move
Move page partitioner into its own module
This commit is contained in:
parent
bdcb2f1bef
commit
4ec3429dec
0
synthesis/partitioner/__init__.py
Normal file
0
synthesis/partitioner/__init__.py
Normal file
71
synthesis/partitioner/page_partitioner.py
Normal file
71
synthesis/partitioner/page_partitioner.py
Normal file
@ -0,0 +1,71 @@
|
||||
import abc
|
||||
from typing import List, Tuple
|
||||
|
||||
from PIL import Image
|
||||
from funcy import lflatten
|
||||
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
from synthesis.random import rnd
|
||||
|
||||
|
||||
class PagePartitioner(abc.ABC):
|
||||
# TODO: produce boxes for page numbers, headers and footers
|
||||
def __init__(self):
|
||||
self.left_margin_percentage = 0.05
|
||||
self.right_margin_percentage = 0.05
|
||||
self.top_margin_percentage = 0.1
|
||||
self.bottom_margin_percentage = 0.1
|
||||
|
||||
self.recursive_margin_percentage = 0.007
|
||||
self.max_recursion_depth = 3
|
||||
self.initial_recursion_probability = 1
|
||||
self.recursion_probability_decay = 0.1
|
||||
|
||||
def __call__(self, page: Image.Image) -> List[Rectangle]:
|
||||
left_margin = int(page.width * self.left_margin_percentage)
|
||||
right_margin = int(page.width * self.right_margin_percentage)
|
||||
top_margin = int(page.height * self.top_margin_percentage)
|
||||
bottom_margin = int(page.height * self.bottom_margin_percentage)
|
||||
|
||||
box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin)
|
||||
boxes = lflatten(self.generate_content_boxes(box))
|
||||
return boxes
|
||||
|
||||
@abc.abstractmethod
|
||||
def generate_content_boxes(self, box: Rectangle, depth=0):
|
||||
raise NotImplementedError
|
||||
|
||||
def generate_child_boxes(self, box: Rectangle, axis, split_percentage=0.5) -> Tuple[Rectangle, Rectangle]:
|
||||
assert axis in ["x", "y"]
|
||||
|
||||
edge_anchor_point, edge_length = (box.x1, box.width) if axis == "x" else (box.y1, box.height)
|
||||
split_coordinate = split_percentage * edge_length + edge_anchor_point
|
||||
child_boxes = get_child_boxes(box, split_coordinate, axis, self.recursive_margin_percentage)
|
||||
return child_boxes
|
||||
|
||||
def recurse(self, depth):
|
||||
return rnd.random() <= self.recursion_probability(depth)
|
||||
|
||||
def recursion_probability(self, depth):
|
||||
return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth
|
||||
|
||||
|
||||
def get_child_boxes(box: Rectangle, split_coordinate, axis, margin_percentage) -> Tuple[Rectangle, Rectangle]:
|
||||
assert axis in ["x", "y"]
|
||||
|
||||
def low(point_1d):
|
||||
return point_1d * (1 + margin_percentage)
|
||||
|
||||
def high(point_1d):
|
||||
return point_1d * (1 - margin_percentage)
|
||||
|
||||
if axis == "x":
|
||||
return (
|
||||
Rectangle(low(box.x1), low(box.y1), high(split_coordinate), high(box.y2)),
|
||||
Rectangle(low(split_coordinate), low(box.y1), high(box.x2), high(box.y2)),
|
||||
)
|
||||
else:
|
||||
return (
|
||||
Rectangle(low(box.x1), low(box.y1), high(box.x2), high(split_coordinate)),
|
||||
Rectangle(low(box.x1), low(split_coordinate), high(box.x2), high(box.y2)),
|
||||
)
|
||||
8
synthesis/random.py
Normal file
8
synthesis/random.py
Normal file
@ -0,0 +1,8 @@
|
||||
import random
|
||||
|
||||
from loguru import logger
|
||||
|
||||
random_seed = random.randint(0, 2**32 - 1)
|
||||
|
||||
logger.info(f"Random seed: {random_seed}")
|
||||
rnd = random.Random(random_seed)
|
||||
73
test/fixtures/page_generation/page.py
vendored
73
test/fixtures/page_generation/page.py
vendored
@ -27,23 +27,18 @@ from cv_analysis.utils.image_operations import blur, sharpen, overlay, superimpo
|
||||
from cv_analysis.utils.merging import merge_related_rectangles
|
||||
from cv_analysis.utils.postprocessing import remove_overlapping, remove_included
|
||||
from cv_analysis.utils.spacial import area
|
||||
from synthesis.partitioner.page_partitioner import PagePartitioner
|
||||
from synthesis.random import rnd
|
||||
from synthesis.segment.content_rectangle import ContentRectangle
|
||||
|
||||
logger.remove()
|
||||
logger.add(sys.stderr, level="INFO")
|
||||
|
||||
random_seed = random.randint(0, 2**32 - 1)
|
||||
# random_seed = 2973413116
|
||||
# random_seed = 2212357755
|
||||
|
||||
rnd = random.Random(random_seed)
|
||||
logger.info(f"Random seed: {random_seed}")
|
||||
|
||||
from funcy import (
|
||||
juxt,
|
||||
compose,
|
||||
identity,
|
||||
lflatten,
|
||||
lmap,
|
||||
first,
|
||||
iterate,
|
||||
@ -197,7 +192,6 @@ def tinted_blank_page(size, color, color_intensity):
|
||||
|
||||
@pytest.fixture
|
||||
def blank_page(size, color, color_intensity):
|
||||
rnd.seed(random_seed)
|
||||
page = Image.new("RGBA", size, color=(255, 255, 255, 0))
|
||||
return page
|
||||
|
||||
@ -1192,48 +1186,6 @@ def paste_contents(page, contents: Iterable[ContentRectangle]):
|
||||
return page
|
||||
|
||||
|
||||
# TODO: produce boxes for page numbers, headers and footers
|
||||
class PagePartitioner(abc.ABC):
|
||||
def __init__(self):
|
||||
self.left_margin_percentage = 0.05
|
||||
self.right_margin_percentage = 0.05
|
||||
self.top_margin_percentage = 0.1
|
||||
self.bottom_margin_percentage = 0.1
|
||||
|
||||
self.recursive_margin_percentage = 0.007
|
||||
self.max_recursion_depth = 3
|
||||
self.initial_recursion_probability = 1
|
||||
self.recursion_probability_decay = 0.1
|
||||
|
||||
def __call__(self, page: Image.Image) -> List[Rectangle]:
|
||||
left_margin = int(page.width * self.left_margin_percentage)
|
||||
right_margin = int(page.width * self.right_margin_percentage)
|
||||
top_margin = int(page.height * self.top_margin_percentage)
|
||||
bottom_margin = int(page.height * self.bottom_margin_percentage)
|
||||
|
||||
box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin)
|
||||
boxes = lflatten(self.generate_content_boxes(box))
|
||||
return boxes
|
||||
|
||||
@abc.abstractmethod
|
||||
def generate_content_boxes(self, box: Rectangle, depth=0):
|
||||
raise NotImplementedError
|
||||
|
||||
def generate_child_boxes(self, box: Rectangle, axis, split_percentage=0.5) -> Tuple[Rectangle, Rectangle]:
|
||||
assert axis in ["x", "y"]
|
||||
|
||||
edge_anchor_point, edge_length = (box.x1, box.width) if axis == "x" else (box.y1, box.height)
|
||||
split_coordinate = split_percentage * edge_length + edge_anchor_point
|
||||
child_boxes = get_child_boxes(box, split_coordinate, axis, self.recursive_margin_percentage)
|
||||
return child_boxes
|
||||
|
||||
def recurse(self, depth):
|
||||
return rnd.random() <= self.recursion_probability(depth)
|
||||
|
||||
def recursion_probability(self, depth):
|
||||
return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth
|
||||
|
||||
|
||||
class RandomPagePartitioner(PagePartitioner):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
@ -1275,27 +1227,6 @@ class TwoColumnPagePartitioner(PagePartitioner):
|
||||
yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
|
||||
|
||||
|
||||
def get_child_boxes(box: Rectangle, split_coordinate, axis, margin_percentage) -> Tuple[Rectangle, Rectangle]:
|
||||
assert axis in ["x", "y"]
|
||||
|
||||
def low(point_1d):
|
||||
return point_1d * (1 + margin_percentage)
|
||||
|
||||
def high(point_1d):
|
||||
return point_1d * (1 - margin_percentage)
|
||||
|
||||
if axis == "x":
|
||||
return (
|
||||
Rectangle(low(box.x1), low(box.y1), high(split_coordinate), high(box.y2)),
|
||||
Rectangle(low(split_coordinate), low(box.y1), high(box.x2), high(box.y2)),
|
||||
)
|
||||
else:
|
||||
return (
|
||||
Rectangle(low(box.x1), low(box.y1), high(box.x2), high(split_coordinate)),
|
||||
Rectangle(low(box.x1), low(split_coordinate), high(box.x2), high(box.y2)),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
TwoColumnPagePartitioner,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user