Fix box clashes

Rewrote box generation sequence and eliminated issue with gaps /
overlapping boxes
This commit is contained in:
Matthias Bisping 2023-01-24 12:07:46 +01:00
parent c5ba489931
commit e715c86f8d

View File

@ -380,7 +380,12 @@ def blank_page(texture, texture_fn) -> np.ndarray:
"""Creates a blank page with a given orientation and dpi."""
page = random_flip(texture)
page = texture_fn(page)
page_partitioner = RandomPagePartitioner()
page_partitioner = random.choice(
[
TwoColumnPagePartitioner(),
RandomPagePartitioner(),
]
)
boxes = page_partitioner(page)
content_generator = ContentGenerator()
boxes = content_generator(boxes)
@ -414,19 +419,18 @@ class ContentGenerator:
figure_boxes = lfilter(is_square_like, figure_boxes)
text_boxes = merge_related_rectangles(text_boxes)
text_boxes = lmap(generate_random_text_block, every_nth(2, text_boxes))
tables_1, tables_1_captions = zipmap(generate_recursive_random_table_with_caption, every_nth(2, text_boxes[1:]))
# TODO: Refactor: Figures should be their own class
plots, plot_captions = zipmap(generate_random_plot_with_caption, every_nth(2, figure_boxes))
tables_2, tables_2_captions = zipmap(
generate_recursive_random_table_with_caption, every_nth(2, figure_boxes[1:])
boxes = list(
itertools.chain(
map(generate_random_text_block, every_nth(2, text_boxes)),
*zipmap(generate_recursive_random_table_with_caption, every_nth(2, text_boxes[1:])),
*zipmap(generate_recursive_random_table_with_caption, every_nth(2, figure_boxes)),
*zipmap(generate_random_plot_with_caption, every_nth(2, figure_boxes[1:])),
)
)
boxes = text_boxes + plots + plot_captions + tables_1 + tables_1_captions + tables_2 + tables_2_captions
boxes = remove_included(boxes)
boxes = remove_overlapping(boxes)
if self.constrain_layouts:
boxes = remove_included(boxes)
boxes = remove_overlapping(boxes)
return boxes
@ -770,7 +774,7 @@ def dump_plt_to_image(rectangle):
class RandomFontPicker:
def __init__(self, font_dir=None, size=None):
def __init__(self, font_dir=None, return_default_font=False):
fonts = get_fonts(font_dir)
fonts_lower = [font.lower() for font in fonts]
domestic_fonts_mask = lmap(complement(self.looks_foreign), fonts_lower)
@ -779,6 +783,7 @@ class RandomFontPicker:
self.test_image = Image.new("RGB", (200, 200), (255, 255, 255))
self.draw = ImageDraw.Draw(self.test_image)
self.return_default_font = return_default_font
def looks_foreign(self, font):
# This filters out foreign fonts (e.g. 'Noto Serif Malayalam')
@ -786,6 +791,9 @@ class RandomFontPicker:
def pick_random_font_available_on_system(self, includes=None, excludes=None) -> ImageFont: # FIXME: Slow!
if self.return_default_font:
return ImageFont.load_default()
includes = [i.lower() for i in includes] if includes else []
excludes = [i.lower() for i in excludes] if excludes else []
@ -839,7 +847,7 @@ def get_fonts(path: Path = None) -> List[str]:
@lru_cache(maxsize=None)
def get_font_picker(**kwargs):
return RandomFontPicker(**kwargs)
return RandomFontPicker(**kwargs, return_default_font=True)
@lru_cache(maxsize=None)
@ -1199,9 +1207,13 @@ class PagePartitioner(abc.ABC):
def generate_content_boxes(self, box: Rectangle, depth=0):
raise NotImplementedError
@abc.abstractmethod
def generate_child_boxes(self, box: Rectangle) -> Tuple[Rectangle, Rectangle]:
pass
def generate_child_boxes(self, box: Rectangle, axis, split_percentage=0.5) -> Tuple[Rectangle, Rectangle]:
assert axis in ["x", "y"]
edge_anchor_point, edge_length = (box.x1, box.width) if axis == "x" else (box.y1, box.height)
split_coordinate = split_percentage * edge_length + edge_anchor_point
child_boxes = get_child_boxes(box, split_coordinate, axis, self.recursive_margin_percentage)
return child_boxes
def recurse(self, depth):
return random.random() <= self.recursion_probability(depth)
@ -1218,19 +1230,40 @@ class RandomPagePartitioner(PagePartitioner):
if depth >= self.max_recursion_depth:
yield box
else:
child_boxes = self.generate_child_boxes(box)
child_boxes = self.generate_child_boxes(
box,
axis=random.choice(["x", "y"]),
split_percentage=random.uniform(0.3, 0.7),
)
if self.recurse(depth):
yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
else:
yield child_boxes
def generate_child_boxes(self, box: Rectangle) -> Tuple[Rectangle, Rectangle]:
axis = random.choice(["x", "y"])
edge_anchor_point, edge_length = (box.x1, box.width) if axis == "x" else (box.y1, box.height)
split_coordinate = random.uniform(0.3, 0.7) * edge_length + edge_anchor_point
child_boxes = get_child_boxes(box, split_coordinate, axis, self.recursive_margin_percentage)
return child_boxes
class TwoColumnPagePartitioner(PagePartitioner):
def __init__(self):
super().__init__()
# self.recursive_margin_percentage = 0.1
# self.left_margin_percentage = 0.1
# self.right_margin_percentage = 0.1
self.max_recursion_depth = 3
def generate_content_boxes(self, box: Rectangle, depth=0):
if depth >= self.max_recursion_depth:
yield box
else:
if depth == 0:
axis = "x"
split_percentage = 0.5
else:
axis = "y"
split_percentage = random.choice([0.3, 0.7])
child_boxes = self.generate_child_boxes(box, axis=axis, split_percentage=split_percentage)
yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
def get_child_boxes(box: Rectangle, split_coordinate, axis, margin_percentage) -> Tuple[Rectangle, Rectangle]: