Matthias Bisping fd76933b5a Refactoring: Move
Move image operations such as blurring into their own module.
2023-02-01 16:16:18 +01:00

1462 lines
46 KiB
Python

import abc
import io
import itertools
import random
import sys
import textwrap
from copy import deepcopy
from enum import Enum
from functools import lru_cache, partial
from math import sqrt
from pathlib import Path
from typing import Tuple, Iterable, List
import blend_modes
import numpy as np
import pytest
from PIL import Image, ImageFont, ImageDraw, ImageEnhance
from PIL.Image import Transpose
from faker import Faker
from loguru import logger
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap
from cv_analysis.utils import star, rconj, conj
from cv_analysis.utils.conversion import normalize_image_format_to_array, normalize_image_format_to_pil
from cv_analysis.utils.image_operations import blur, sharpen, overlay
from cv_analysis.utils.merging import merge_related_rectangles
from cv_analysis.utils.postprocessing import remove_overlapping, remove_included
from cv_analysis.utils.spacial import area
from synthesis.segment.content_rectangle import ContentRectangle
logger.remove()
logger.add(sys.stderr, level="INFO")
random_seed = random.randint(0, 2**32 - 1)
random_seed = 2973413116
# random_seed = 2212357755
rnd = random.Random(random_seed)
logger.info(f"Random seed: {random_seed}")
from funcy import (
juxt,
compose,
identity,
lflatten,
lmap,
first,
iterate,
take,
last,
rest,
rcompose,
lsplit,
lfilter,
lzip,
keep,
repeatedly,
mapcat,
omit,
project,
complement,
lremove,
chunks,
)
from cv_analysis.locations import TEST_PAGE_TEXTURES_DIR
from cv_analysis.utils.display import show_image
from cv_analysis.utils.rectangle import Rectangle
@pytest.fixture(
params=[
# "rough_grain",
# "plain",
# "digital",
"crumpled",
]
)
def base_texture(request, size):
texture = Image.open(TEST_PAGE_TEXTURES_DIR / (request.param + ".jpg"))
texture = texture.resize(size)
# texture.putalpha(255) # ISSUE 1!!!
return texture
@pytest.fixture(
params=[
"portrait",
# "landscape",
]
)
def orientation(request):
return request.param
@pytest.fixture(
params=[
# 30,
100,
]
)
def dpi(request):
return request.param
@pytest.fixture(
params=[
# "brown",
# "sepia",
# "gray",
"white",
# "light_red",
# "light_blue",
]
)
def color_name(request):
return request.param
@pytest.fixture(
params=[
# "smooth",
# "coarse",
"neutral",
]
)
def texture_name(request):
return request.param
@pytest.fixture(
params=[
# 30,
70,
# 150,
]
)
def color_intensity(request):
return request.param
def random_flip(image):
if rnd.choice([True, False]):
image = image.transpose(Transpose.FLIP_LEFT_RIGHT)
if rnd.choice([True, False]):
image = image.transpose(Transpose.FLIP_TOP_BOTTOM)
return image
@pytest.fixture
def color(color_name):
return {
"brown": "#7d6c5b",
"sepia": "#b8af88",
"gray": "#9c9c9c",
"white": "#ffffff",
"light_red": "#d68c8b",
"light_blue": "#8bd6d6",
}[color_name]
@pytest.fixture
def texture_fn(texture_name, size):
if texture_name == "smooth":
fn = blur
elif texture_name == "coarse":
fn = compose(overlay, juxt(blur, sharpen))
else:
fn = identity
return normalize_image_function(fn)
def normalize_image_function(func):
def inner(image):
image = normalize_image_format_to_array(image)
image = func(image)
image = normalize_image_format_to_pil(image)
return image
return inner
@pytest.fixture
def texture(tinted_blank_page, base_texture):
texture = superimpose_texture_with_transparency(base_texture, tinted_blank_page)
return texture
@pytest.fixture
def tinted_blank_page(size, color, color_intensity):
tinted_page = Image.new("RGBA", size, color)
tinted_page.putalpha(color_intensity)
return tinted_page
@pytest.fixture
def blank_page(size, color, color_intensity):
rnd.seed(random_seed)
page = Image.new("RGBA", size, color=(255, 255, 255, 0))
return page
@pytest.fixture
def size(dpi, orientation):
if orientation == "portrait":
size = (8.5 * dpi, 11 * dpi)
elif orientation == "landscape":
size = (11 * dpi, 8.5 * dpi)
else:
raise ValueError(f"Unknown orientation: {orientation}")
size = tuple(map(int, size))
return size
def superimpose_texture_with_transparency(
page: Image,
texture: Image,
crop_to_content=True,
pad=True,
) -> Image:
"""Superimposes a noise image with transparency onto a page image.
TODO: Rename page and texture to something more generic.
Args:
page: The page image.
texture: The texture image.
crop_to_content: If True, the texture will be cropped to content (i.e. the bounding box of all non-transparent
parts of the texture image).
pad: If True, the texture will be padded to the size of the page.
Returns:
Image where the texture is superimposed onto the page.
"""
page = normalize_image_format_to_pil(page)
texture = normalize_image_format_to_pil(texture)
if crop_to_content:
texture = texture.crop(texture.getbbox())
if page.size != texture.size:
logger.trace(f"Size of page and texture do not match: {page.size} != {texture.size}")
if pad:
logger.trace(f"Padding texture before pasting to fit size {page.size}")
texture = pad_image_to_size(texture, page.size)
else:
logger.trace(f"Resizing texture before pasting to fit size {page.size}")
texture = texture.resize(page.size)
assert page.size == texture.size
assert texture.mode == "RGBA"
page.paste(texture, (0, 0), texture)
return page
def pad_image_to_size(image: Image, size: Tuple[int, int]) -> Image:
"""Pads an image to a given size."""
if image.size == size:
return image
if image.size[0] > size[0] or image.size[1] > size[1]:
raise ValueError(f"Image size {image.size} is larger than target size {size}.")
padded = Image.new(image.mode, size, color=255)
pasting_coords = compute_pasting_coordinates(image, padded)
assert image.mode == "RGBA"
padded.paste(image, pasting_coords)
return padded
def compute_pasting_coordinates(smaller: Image, larger: Image.Image):
"""Computes the coordinates for centrally pasting a smaller image onto a larger image."""
return abs(larger.width - smaller.width) // 2, abs(larger.height - smaller.height) // 2
def to_array(image: Image) -> np.ndarray:
"""Converts a PIL image to a numpy array."""
return np.array(image).astype(np.float32)
class ContentGenerator:
def __init__(self):
self.constrain_layouts = True
def __call__(self, boxes: List[Rectangle]) -> Image:
rnd.shuffle(boxes)
figure_boxes, text_boxes = lsplit(is_square_like, boxes)
if self.constrain_layouts:
figure_boxes = merge_related_rectangles(figure_boxes)
figure_boxes = lfilter(is_square_like, figure_boxes)
text_boxes = merge_related_rectangles(text_boxes)
boxes = list(
itertools.chain(
map(generate_random_text_block, every_nth(2, text_boxes)),
*zipmap(generate_recursive_random_table_with_caption, every_nth(2, text_boxes[1:])),
*zipmap(generate_recursive_random_table_with_caption, every_nth(2, figure_boxes)),
*zipmap(generate_random_plot_with_caption, every_nth(2, figure_boxes[1:])),
)
)
if self.constrain_layouts:
boxes = remove_included(boxes)
boxes = remove_overlapping(boxes)
return boxes
def zipmap(fn, boxes, n=2):
rets = lmap(list, zip(*map(fn, boxes)))
yield from repeatedly(lambda: [], n) if len(rets) < n else rets
def is_square_like(box: Rectangle):
return box.width / box.height > 0.5 and box.height / box.width > 0.5
def is_wide(box: Rectangle):
return box.width / box.height > 1.5
def is_tall(box: Rectangle):
return box.height / box.width > 1.5
def every_nth(n, iterable):
return itertools.islice(iterable, 0, None, n)
def generate_random_plot_with_caption(rectangle: Rectangle):
plot_box, caption_box = split_into_figure_and_caption(rectangle)
plot_box = generate_random_plot(plot_box)
caption_box = generate_random_image_caption(caption_box)
return plot_box, caption_box
# TODO: deduplicate with generate_random_table_with_caption
def generate_recursive_random_table_with_caption(rectangle: Rectangle):
table_box, caption_box = split_into_figure_and_caption(rectangle)
table_box = generate_recursive_random_table(table_box, double_rule=probably())
caption_box = generate_random_table_caption(caption_box)
return table_box, caption_box
def split_into_figure_and_caption(rectangle: Rectangle):
gap_percentage = rnd.uniform(0, 0.03)
split_point = rnd.uniform(0.5, 0.9)
figure_box = Rectangle(
rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y1 + rectangle.height * (split_point - gap_percentage / 2)
)
caption_box = Rectangle(
rectangle.x1, rectangle.y1 + rectangle.height * (split_point + gap_percentage / 2), rectangle.x2, rectangle.y2
)
return figure_box, caption_box
def generate_random_plot(rectangle: Rectangle) -> ContentRectangle:
block = RandomPlot(*rectangle.coords)
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
block.generate_random_plot(rectangle)
return block
def generate_recursive_random_table(rectangle: Rectangle, **kwargs) -> ContentRectangle:
block = RecursiveRandomTable(*rectangle.coords, **kwargs)
if isinstance(rectangle, RecursiveRandomTable):
block.content = rectangle.content if rectangle.content else None # TODO: Refactor
block.generate_random_table()
return block
# TODO: Refactor
def generate_random_page_number(page: Image) -> ContentRectangle:
rectangle = Rectangle(0, 0, page.width, page.height)
block = RandomPageNumber(*rectangle.coords)
block.content = page # TODO: Refactor
block.generate_random_page_number(rectangle)
return block
@lru_cache(maxsize=None)
def get_random_seed():
return rnd.randint(0, 2**32 - 1)
class RandomContentRectangle(ContentRectangle):
def __init__(self, x1, y1, x2, y2, content=None, seed=None):
super().__init__(x1, y1, x2, y2, content)
self.seed = seed or get_random_seed()
self.random = random.Random(self.seed)
class Size(Enum):
SMALL = 120
MEDIUM = 180
LARGE = 300
def get_size_class(rectangle: Rectangle):
size = get_size(rectangle)
if size < Size.SMALL.value:
return Size.SMALL
elif size < Size.LARGE.value:
return Size.MEDIUM
else:
return Size.LARGE
def get_size(rectangle: Rectangle):
size = sqrt(area(rectangle))
return size
def get_random_color_complementing_color_map(colormap):
def color_complement(r, g, b):
"""Reference: https://stackoverflow.com/a/40234924"""
def hilo(a, b, c):
if c < b:
b, c = c, b
if b < a:
a, b = b, a
if c < b:
b, c = c, b
return a + c
k = hilo(r, g, b)
return tuple(k - u for u in (r, g, b))
color = colormap(0.2)[:3]
color = [int(255 * v) for v in color]
color = color_complement(*color)
return color
@lru_cache(maxsize=None)
def get_random_background_color():
return tuple([*get_random_color_complementing_color_map(pick_colormap()), rnd.randint(100, 210)])
class RecursiveRandomTable(RandomContentRectangle):
def __init__(self, x1, y1, x2, y2, border_width=1, layout: str = None, double_rule=False):
"""A table with a random number of rows and columns, and random content in each cell.
Args:
x1: x-coordinate of the top-left corner
y1: y-coordinate of the top-left corner
x2: x-coordinate of the bottom-right corner
y2: y-coordinate of the bottom-right corner
border_width: width of the table border
layout: layout of the table, either "horizontal", "vertical", "closed", or "open"
double_rule: whether to use double rules as the top and bottom rules
"""
assert layout in [None, "horizontal", "vertical", "closed", "open"]
super().__init__(x1, y1, x2, y2)
self.double_rule = double_rule
self.double_rule_width = (3 * border_width) if self.double_rule else 0
self.n_columns = rnd.randint(1, max(self.width // 100, 1))
self.n_rows = rnd.randint(1, max((self.height - 2 * self.double_rule_width) // rnd.randint(17, 100), 1))
self.cell_size = (self.width / self.n_columns, (self.height - 2 * self.double_rule_width) / self.n_rows)
self.content = Image.new("RGBA", (self.width, self.height), (255, 255, 255, 0))
self.background_color = get_random_background_color()
logger.info(f"Background color: {self.background_color}")
self.layout = layout or self.pick_random_layout()
logger.debug(f"Layout: {self.layout}")
def pick_random_layout(self):
if self.n_columns == 1 and self.n_rows == 1:
layout = "closed"
elif self.n_columns == 1:
layout = rnd.choice(["vertical", "closed"])
elif self.n_rows == 1:
layout = rnd.choice(["horizontal", "closed"])
else:
layout = rnd.choice(["closed", "horizontal", "vertical", "open"])
return layout
def generate_random_table(self):
cells = self.generate_table()
cells = list(self.fill_cells_with_content(cells))
# FIXME: There is a bug here: Table rule is not drawn correctly, actually we want to do cells = ...
list(self.draw_cell_borders(cells))
self.content = paste_contents(self.content, cells)
assert self.content.mode == "RGBA"
def fill_cells_with_content(self, cells):
yield from map(self.build_cell, cells)
def build_cell(self, cell):
if self.__is_a_small_cell(cell):
cell = self.build_small_cell(cell)
elif self.__is_a_medium_sized_cell(cell):
cell = self.build_medium_sized_cell(cell)
elif self.__is_a_large_cell(cell):
cell = self.build_large_cell(cell)
else:
raise ValueError(f"Invalid cell size: {get_size(cell)}")
assert cell.content.mode == "RGBA"
return cell
def __is_a_small_cell(self, cell):
return get_size(cell) <= Size.SMALL.value
def __is_a_medium_sized_cell(self, cell):
return get_size(cell) <= Size.MEDIUM.value
def __is_a_large_cell(self, cell):
return get_size(cell) > Size.MEDIUM.value
def build_small_cell(self, cell):
content = (possibly() and " ".join(generate_random_words(1, 3))) or (
str(round(generate_random_number(), random.choice([0, 1, 2, 3])))
+ ((possibly() and " " + rnd.choice(["$", "£", "%", "EUR", "USD", "CAD", "ADA"])) or "")
)
return generate_text_block(cell, content)
def build_medium_sized_cell(self, cell):
choice = rnd.choice(["plot", "recurse"])
if choice == "plot":
return generate_random_plot(cell)
elif choice == "recurse":
return generate_recursive_random_table(
cell,
border_width=1,
layout=random.choice(["open", "horizontal", "vertical"]),
double_rule=False,
)
else:
return generate_text_block(cell, f"{choice} {get_size(cell):.0f} {get_size_class(cell).name}")
def build_large_cell(self, cell):
choice = rnd.choice(["plot", "recurse"])
logger.debug(f"Generating {choice} {get_size(cell):.0f} {get_size_class(cell).name}")
if choice == "plot" and is_square_like(cell):
return generate_random_plot(cell)
else:
logger.debug(f"recurse {get_size(cell):.0f} {get_size_class(cell).name}")
return generate_recursive_random_table(
cell,
border_width=1,
layout=random.choice(["open", "horizontal", "vertical"]),
double_rule=False,
)
def draw_cell_borders(self, cells: List[ContentRectangle]):
def draw_edges_based_on_position(cell: Cell, col_idx, row_index):
# Draw the borders of the cell based on its position in the table
if col_idx < self.n_columns - 1:
cell.draw_right_border()
if row_index < self.n_rows - 1:
cell.draw_bottom_border()
columns = chunks(self.n_rows, cells)
for col_idx, column in enumerate(columns):
for row_index, cell in enumerate(column):
# TODO: Refactor
c = Cell(*cell.coords, self.background_color)
c.content = cell.content
draw_edges_based_on_position(c, col_idx, row_index)
yield cell
if self.layout == "closed":
# TODO: Refactor
c = Cell(*self.coords, self.background_color)
c.content = self.content
c.draw()
yield self
# TODO: Refactor
if self.double_rule:
c1 = Cell(*self.coords)
c1.draw_top_border(width=1)
c1.draw_bottom_border(width=1)
x1, y1, x2, y2 = self.coords
c2 = Cell(x1, y1 + self.double_rule_width, x2, y2 - self.double_rule_width)
c2.draw_top_border(width=1)
c2.draw_bottom_border(width=1)
c = superimpose_texture_with_transparency(c1.content, c2.content)
self.content = superimpose_texture_with_transparency(c, self.content)
yield self
def generate_table(self) -> Iterable[ContentRectangle]:
yield from mapcat(self.generate_column, range(self.n_columns))
def generate_column(self, column_index) -> Iterable[ContentRectangle]:
logger.trace(f"Generating column {column_index}.")
generate_cell_for_row_index = partial(self.generate_cell, column_index)
yield from map(generate_cell_for_row_index, range(self.n_rows))
def generate_cell(self, column_index, row_index) -> ContentRectangle:
w, h = self.cell_size
x1, y1 = (column_index * w), (row_index * h) + self.double_rule_width
x2, y2 = x1 + w, y1 + h
logger.trace(f"Generating cell ({row_index}, {column_index}) at ({x1}, {y1}, {x2}, {y2}).")
return Cell(x1, y1, x2, y2, self.background_color)
def generate_column_names(self):
column_names = repeatedly(self.generate_column_name, self.n_columns)
return column_names
def generate_column_name(self):
column_name = generate_random_words(1, 3)
return column_name
class Cell(ContentRectangle):
def __init__(self, x1, y1, x2, y2, color=None):
super().__init__(x1, y1, x2, y2)
self.background_color = color or (255, 255, 255, 0)
# to debug use random border color: tuple([random.randint(100, 200) for _ in range(3)] + [255])
self.cell_border_color = (0, 0, 0, 255)
self.border_width = 1
self.inset = 1
self.content = Image.new("RGBA", (self.width, self.height))
self.fill()
def draw_top_border(self, width=None):
self.draw_line((0, 0, self.width - self.inset, 0), width=width)
return self
def draw_bottom_border(self, width=None):
self.draw_line((0, self.height - self.inset, self.width - self.inset, self.height - self.inset), width=width)
return self
def draw_left_border(self, width=None):
self.draw_line((0, 0, 0, self.height), width=width)
return self
def draw_right_border(self, width=None):
self.draw_line((self.width - self.inset, 0, self.width - self.inset, self.height), width=width)
return self
def draw_line(self, points, width=None):
width = width or self.border_width
draw = ImageDraw.Draw(self.content)
draw.line(points, width=width, fill=self.cell_border_color)
return self
def draw(self, width=None):
self.draw_top_border(width=width)
self.draw_bottom_border(width=width)
self.draw_left_border(width=width)
self.draw_right_border(width=width)
return self
def draw_top_left_corner(self, width=None):
self.draw_line((0, 0, 0, 0), width=width)
self.draw_line((0, 0, 0, 0), width=width)
return self
def draw_top_right_corner(self, width=None):
self.draw_line((self.width - self.inset, 0, self.width - self.inset, 0), width=width)
self.draw_line((self.width - self.inset, 0, self.width - self.inset, 0), width=width)
return self
def draw_bottom_left_corner(self, width=None):
self.draw_line((0, self.height - self.inset, 0, self.height - self.inset), width=width)
self.draw_line((0, self.height - self.inset, 0, self.height - self.inset), width=width)
return self
def draw_bottom_right_corner(self, width=None):
self.draw_line(
(self.width - self.inset, self.height - self.inset, self.width - self.inset, self.height - self.inset),
width=width,
)
self.draw_line(
(self.width - self.inset, self.height - self.inset, self.width - self.inset, self.height - self.inset),
width=width,
)
return self
def fill(self, color=None):
color = color or self.background_color
image = Image.new("RGBA", (self.width, self.height), color=color)
self.content = superimpose_texture_with_transparency(image, self.content)
return self
def generate_random_words(n_min, n_max):
column_name = Faker().words(rnd.randint(n_min, n_max))
return column_name
def generate_random_number():
return random.choice([random.randint(-10000, 10000), random.uniform(-100, 100)])
def shrink_rectangle(rectangle: Rectangle, factor: float) -> Rectangle:
x1, y1, x2, y2 = compute_scaled_coordinates(rectangle, (1 - factor))
logger.trace(f"Shrinking {rectangle} by {factor} to ({x1}, {y1}, {x2}, {y2}).")
assert x1 >= rectangle.x1
assert y1 >= rectangle.y1
assert x2 <= rectangle.x2
assert y2 <= rectangle.y2
shrunk_rectangle = Rectangle(x1, y1, x2, y2)
if isinstance(rectangle, ContentRectangle): # TODO: Refactor
shrunk_rectangle = ContentRectangle(*shrunk_rectangle.coords, rectangle.content)
return shrunk_rectangle
def compute_scaled_coordinates(rectangle: Rectangle, factor: float) -> Tuple[int, int, int, int]:
# TODO: Refactor: Using image to compute coordinates is not clean
image = Image.new("RGBA", (rectangle.width, rectangle.height))
scaled = image.resize((int(rectangle.width * factor), int(rectangle.height * factor)))
x1, y1 = compute_pasting_coordinates(scaled, image)
x1 = rectangle.x1 + x1
y1 = rectangle.y1 + y1
x2, y2 = x1 + scaled.width, y1 + scaled.height
return x1, y1, x2, y2
def dump_plt_to_image(rectangle):
buf = io.BytesIO()
plt.savefig(buf, format="png", transparent=True)
buf.seek(0)
image = Image.open(buf)
image = image.resize((rectangle.width, rectangle.height))
buf.close()
plt.close()
return image
class RandomFontPicker:
def __init__(self, font_dir=None, return_default_font=False):
fonts = get_fonts(font_dir)
fonts_lower = [font.lower() for font in fonts]
domestic_fonts_mask = lmap(complement(self.looks_foreign), fonts_lower)
self.fonts = list(itertools.compress(fonts, domestic_fonts_mask))
self.fonts_lower = list(itertools.compress(fonts_lower, domestic_fonts_mask))
self.test_image = Image.new("RGB", (200, 200), (255, 255, 255))
self.draw = ImageDraw.Draw(self.test_image)
self.return_default_font = return_default_font
def looks_foreign(self, font):
# This filters out foreign fonts (e.g. 'Noto Serif Malayalam')
return len(font.split("-")[0]) > 10
def pick_random_font_available_on_system(self, includes=None, excludes=None) -> ImageFont: # FIXME: Slow!
if self.return_default_font:
return ImageFont.load_default()
includes = [i.lower() for i in includes] if includes else []
excludes = [i.lower() for i in excludes] if excludes else []
logger.debug(f"Picking font by includes={includes} and excludes={excludes}.")
def includes_pattern(font):
return not includes or any(include in font for include in includes)
def excludes_pattern(font):
return not excludes or not any(exclude in font for exclude in excludes)
self.shuffle_fonts()
mask = lmap(lambda f: includes_pattern(f) and excludes_pattern(f), self.fonts_lower)
fonts = itertools.compress(self.fonts, mask)
fonts = keep(map(self.load_font, fonts))
# fonts = filter(self.font_is_renderable, fonts) # FIXME: this does not work
font = first(fonts)
logger.info(f"Using font: {font.getname()}")
return font
def shuffle_fonts(self):
l = lzip(self.fonts, self.fonts_lower)
rnd.shuffle(l)
self.fonts, self.fonts_lower = lzip(*l)
def pick_random_mono_space_font_available_on_system(self) -> ImageFont:
return self.pick_random_font_available_on_system(includes=["mono"], excludes=["oblique"])
@lru_cache(maxsize=None)
def load_font(self, font: str):
logger.trace(f"Loading font: {font}")
try:
return ImageFont.truetype(font, size=11)
except OSError:
return None
@lru_cache(maxsize=None)
def font_is_renderable(self, font):
text_size = self.draw.textsize("Test String", font=font)
return text_size[0] > 0 and text_size[1]
def get_fonts(path: Path = None) -> List[str]:
path = path or Path("/usr/share/fonts")
fonts = list(path.rglob("*.ttf"))
fonts = [font.name for font in fonts]
return fonts
@lru_cache(maxsize=None)
def get_font_picker(**kwargs):
return RandomFontPicker(**kwargs, return_default_font=True)
@lru_cache(maxsize=None)
def pick_random_mono_space_font_available_on_system(**kwargs):
font_picker = get_font_picker(**omit(kwargs, ["includes", "excludes"]))
return font_picker.pick_random_mono_space_font_available_on_system()
@lru_cache(maxsize=None)
def pick_random_font_available_on_system(**kwargs):
kwargs["excludes"] = (
*kwargs.get(
"excludes",
),
"Kinnari",
"KacstOne",
)
font_picker = get_font_picker(**omit(kwargs, ["includes", "excludes"]))
return font_picker.pick_random_font_available_on_system(**project(kwargs, ["includes", "excludes"]))
@lru_cache(maxsize=None)
def pick_colormap() -> ListedColormap:
cmap_name = rnd.choice(
[
"viridis",
"plasma",
"inferno",
"magma",
"cividis",
],
)
cmap = plt.get_cmap(cmap_name)
return cmap
class RandomPlot(RandomContentRectangle):
def __init__(self, x1, y1, x2, y2, seed=None):
super().__init__(x1, y1, x2, y2, seed=seed)
self.cmap = pick_colormap()
def __call__(self, *args, **kwargs):
pass
def generate_random_plot(self, rectangle: Rectangle):
if is_square_like(rectangle):
plt_fn = rnd.choice(
[
self.generate_random_line_plot,
self.generate_random_bar_plot,
self.generate_random_scatter_plot,
self.generate_random_histogram,
self.generate_random_pie_chart,
]
)
elif is_wide(rectangle):
plt_fn = rnd.choice(
[
self.generate_random_line_plot,
self.generate_random_histogram,
self.generate_random_bar_plot,
]
)
elif is_tall(rectangle):
plt_fn = rnd.choice(
[
self.generate_random_bar_plot,
self.generate_random_histogram,
]
)
else:
plt_fn = self.generate_random_scatter_plot
plt_fn(rectangle)
def generate_random_bar_plot(self, rectangle: Rectangle):
x = sorted(np.random.randint(low=1, high=11, size=5))
y = np.random.randint(low=1, high=11, size=5)
bar_fn = partial(
plt.bar,
log=random.choice([True, False]),
)
self.__generate_random_plot(bar_fn, rectangle, x, y)
def generate_random_line_plot(self, rectangle: Rectangle):
f = rnd.choice([np.sin, np.cos, np.tan, np.exp, np.log, np.sqrt, np.square])
x = np.linspace(0, 10, 100)
y = f(x)
plot_fn = partial(
plt.plot,
)
self.__generate_random_plot(plot_fn, rectangle, x, y)
def generate_random_scatter_plot(self, rectangle: Rectangle):
n = rnd.randint(10, 40)
x = np.random.normal(size=n)
y = np.random.normal(size=n)
scatter_fn = partial(
plt.scatter,
cmap=self.cmap,
marker=rnd.choice(["o", "*", "+", "x"]),
)
self.__generate_random_plot(scatter_fn, rectangle, x, y)
def generate_random_histogram(self, rectangle: Rectangle):
x = np.random.normal(size=100)
hist_fn = partial(
plt.hist,
orientation=random.choice(["horizontal", "vertical"]),
histtype=random.choice(["bar", "barstacked", "step", "stepfilled"]),
log=random.choice([True, False]),
stacked=random.choice([True, False]),
density=random.choice([True, False]),
cumulative=random.choice([True, False]),
)
self.__generate_random_plot(hist_fn, rectangle, x, random.randint(5, 20))
def generate_random_pie_chart(self, rectangle: Rectangle):
n = random.randint(3, 7)
x = np.random.uniform(size=n)
pie_fn = partial(
plt.pie,
shadow=True,
startangle=90,
pctdistance=0.85,
labeldistance=1.1,
colors=self.cmap(np.linspace(0, 1, 10)),
)
self.__generate_random_plot(
pie_fn,
rectangle,
x,
np.random.uniform(0, 0.1, size=n),
plot_kwargs=self.generate_plot_kwargs(keywords=["a"]),
)
def generate_plot_kwargs(self, keywords=None):
kwargs = {
"color": rnd.choice(self.cmap.colors),
"linestyle": rnd.choice(["-", "--", "-.", ":"]),
"linewidth": rnd.uniform(1, 4),
}
return kwargs if not keywords else {k: v for k, v in kwargs.items() if k in keywords}
def __generate_random_plot(self, plot_fn, rectangle: Rectangle, x, y, plot_kwargs=None):
plot_kwargs = self.generate_plot_kwargs() if plot_kwargs is None else plot_kwargs
fig, ax = plt.subplots()
fig.set_size_inches(rectangle.width / 100, rectangle.height / 100)
fig.tight_layout(pad=0)
plot_fn(x, y, **plot_kwargs)
ax.set_facecolor("none")
probably() and ax.set_title(" ".join(generate_random_words(1, 3)))
# disable axes at random
maybe() and ax.set_xticks([])
maybe() and ax.set_yticks([])
maybe() and ax.set_xticklabels([])
maybe() and ax.set_yticklabels([])
maybe() and ax.set_xlabel("")
maybe() and ax.set_ylabel("")
maybe() and ax.set_title("")
maybe() and ax.set_frame_on(False)
# remove spines at random
maybe() and (ax.spines["top"].set_visible(False) or ax.spines["right"].set_visible(False))
image = dump_plt_to_image(rectangle)
assert image.mode == "RGBA"
self.content = image if not self.content else superimpose_texture_with_transparency(self.content, image)
def maybe():
return rnd.random() > 0.9
def possibly():
return rnd.random() > 0.5
def probably():
return rnd.random() > 0.4
def generate_random_text_block(rectangle: Rectangle, n_sentences=3000) -> ContentRectangle:
block = TextBlock(
*rectangle.coords,
font=pick_random_font_available_on_system(
includes=("serif", "sans-serif"),
excludes=("bold", "mono", "italic", "oblique", "cursive"),
),
font_size=30, # TODO: De-hardcode font size... Seems to have no effect on top of that
)
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
block.generate_random_text(rectangle, n_sentences)
return block
def generate_random_image_caption(rectangle: Rectangle) -> ContentRectangle:
return generate_random_caption(rectangle, f"Fig {rnd.randint(1, 20)}")
def generate_random_table_caption(rectangle: Rectangle) -> ContentRectangle:
return generate_random_caption(rectangle, f"Tabl {rnd.randint(1, 20)}")
def generate_random_caption(rectangle: Rectangle, caption_start, n_sentences=1000) -> ContentRectangle:
block = TextBlock(
*rectangle.coords,
text_generator=CaptionGenerator(caption_start=caption_start),
font=pick_random_font_available_on_system(
includes=("italic",),
excludes=("bold", "mono"),
),
font_size=100, # TODO: De-hardcode font size... Seems to have no effect on top of that
)
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
block.generate_random_text(rectangle, n_sentences)
return block
def generate_text_block(rectangle: Rectangle, text) -> ContentRectangle:
block = TextBlock(
*rectangle.coords,
font=pick_random_font_available_on_system(
includes=("serif", "sans-serif", "bold"),
excludes=("mono", "italic", "oblique", "cursive"),
),
font_size=30, # TODO: De-hardcode font size... Seems to have no effect on top of that
)
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
block.put_text(text, rectangle)
return block
def write_lines_to_image(lines: List[str], rectangle: Rectangle, font=None) -> Image.Image:
def write_line(line, line_number):
draw.text((0, line_number * text_size), line, font=font, fill=(0, 0, 0, 255))
font = font or pick_random_mono_space_font_available_on_system()
image = Image.new("RGBA", (rectangle.width, rectangle.height), (0, 255, 255, 0))
draw = ImageDraw.Draw(image)
text_size = draw.textsize(first(lines), font=font)[1]
for line_number, line in enumerate(lines):
write_line(line, line_number)
return image
class LineFormatter(abc.ABC):
pass
class IdentityLineFormatter(LineFormatter):
def __init__(self):
pass
def __call__(self, lines, last_full):
return lines, last_full
class ParagraphLineFormatter(LineFormatter):
def __init__(self, blank_line_percentage=None):
self.blank_line_percentage = blank_line_percentage or rnd.uniform(0, 0.5)
def __call__(self, lines, last_full):
return self.format_lines(lines, last_full)
def format_lines(self, lines, last_full):
def truncate_current_line():
return rnd.random() < self.blank_line_percentage and last_full
# This is meant to be read from the bottom up.
current_line_shall_not_be_a_full_line = truncate_current_line()
line_formatter = self.truncate_line if current_line_shall_not_be_a_full_line else identity
format_current_line = compose(line_formatter, first)
move_current_line_to_back = star(rconj)
split_first_line_from_lines_and_format_the_former = juxt(rest, format_current_line)
split_off_current_line_then_format_it_then_move_it_to_the_back = rcompose(
split_first_line_from_lines_and_format_the_former,
move_current_line_to_back,
)
current_line_is_a_full_line = not current_line_shall_not_be_a_full_line
# Start reading here and move up.
return split_off_current_line_then_format_it_then_move_it_to_the_back(lines), current_line_is_a_full_line
def format_line(self, line, full=True):
line = self.truncate_line(line) if not full else line
return line, full
def truncate_line(self, line: str):
n_trailing_words = rnd.randint(0, 4)
line = " ".join(line.split()[-n_trailing_words - 1 : -1]).replace(".", "")
line = line + ".\n" if line else line
return line
class TextBlockGenerator(abc.ABC):
pass
class ParagraphGenerator(TextBlockGenerator):
def __init__(self):
self.line_formatter = ParagraphLineFormatter(blank_line_percentage=rnd.uniform(0, 0.5))
def __call__(self, rectangle, n_sentences):
return self.generate_paragraph(rectangle, n_sentences)
def generate_paragraph(self, rectangle, n_sentences):
lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences)
return lines
class CaptionGenerator(TextBlockGenerator):
def __init__(self, caption_start=None):
self.line_formatter = IdentityLineFormatter()
self.caption_start = caption_start or f"Fig {rnd.randint(1, 20)}"
def __call__(self, rectangle, n_sentences):
return self.generate_paragraph(rectangle, n_sentences)
def generate_paragraph(self, rectangle, n_sentences):
lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences)
first_line_modified = f"{self.caption_start}.: {first(lines)}"
lines = conj(first_line_modified, rest(lines))
return lines
class TextBlock(ContentRectangle):
def __init__(self, x1, y1, x2, y2, text_generator=None, font=None, font_size=None):
super().__init__(x1, y1, x2, y2)
self.font = font or ImageFont.load_default() # pick_random_font_available_on_system(size=font_size)
self.text_generator = text_generator or ParagraphGenerator()
def __call__(self, *args, **kwargs):
pass
def generate_random_text(self, rectangle: Rectangle, n_sentences=3000):
lines = self.text_generator(rectangle, n_sentences)
image = write_lines_to_image(lines, rectangle, self.font)
return self.__put_content(image)
def put_text(self, text: str, rectangle: Rectangle):
text_width, text_height = self.font.getsize(text)
width_delta = text_width - rectangle.width
height_delta = text_height - rectangle.height
image = Image.new("RGBA", (text_width, text_height), (0, 255, 255, 0))
if width_delta > 0 or height_delta > 0:
image = image.resize((int(rectangle.width * 0.9), text_height))
draw = ImageDraw.Draw(image)
draw.text((0, 0), text, font=self.font, fill=(0, 0, 0, 255))
return self.__put_content(image)
def __put_content(self, image: Image.Image):
self.content = image if not self.content else superimpose_texture_with_transparency(self.content, image)
assert self.content.mode == "RGBA"
return self
class RandomPageNumber(TextBlock):
def __init__(self, x1, y1, x2, y2):
super().__init__(x1, y1, x2, y2)
self.page_number = random.randint(1, 1000)
self.margin_distance_percentage = 0.05
self.margin_distance_x = int(self.width * self.margin_distance_percentage)
self.margin_distance_y = int(self.height * self.margin_distance_percentage)
self.location_coordinates = self.location_to_coordinates(self.pick_location())
def generate_random_text_lines(rectangle: Rectangle, line_formatter=identity, n_sentences=3000) -> List[str]:
text = Faker().paragraph(nb_sentences=n_sentences, variable_nb_sentences=False, ext_word_list=None)
unformatted_lines = textwrap.wrap(text, width=rectangle.width, break_long_words=False)
# each iteration of the line formatter function formats one more line and adds it to the back of the list
formatted_lines_generator = iterate(star(line_formatter), (unformatted_lines, True))
# hence do as many iterations as there are lines in the rectangle
lines_per_iteration = take(len(unformatted_lines), formatted_lines_generator)
# and then take the lines from the last iteration of the function
formatted_lines, _ = last(lines_per_iteration)
return formatted_lines
def paste_content(page, content_box: ContentRectangle):
assert content_box.content.mode == "RGBA"
page.paste(content_box.content, (content_box.x1, content_box.y1), content_box.content)
return page
def paste_contents(page, contents: Iterable[ContentRectangle]):
page = deepcopy(page)
for content in contents:
paste_content(page, content)
return page
# TODO: produce boxes for page numbers, headers and footers
class PagePartitioner(abc.ABC):
def __init__(self):
self.left_margin_percentage = 0.05
self.right_margin_percentage = 0.05
self.top_margin_percentage = 0.1
self.bottom_margin_percentage = 0.1
self.recursive_margin_percentage = 0.007
self.max_recursion_depth = 3
self.initial_recursion_probability = 1
self.recursion_probability_decay = 0.1
def __call__(self, page: Image.Image) -> List[Rectangle]:
left_margin = int(page.width * self.left_margin_percentage)
right_margin = int(page.width * self.right_margin_percentage)
top_margin = int(page.height * self.top_margin_percentage)
bottom_margin = int(page.height * self.bottom_margin_percentage)
box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin)
boxes = lflatten(self.generate_content_boxes(box))
return boxes
@abc.abstractmethod
def generate_content_boxes(self, box: Rectangle, depth=0):
raise NotImplementedError
def generate_child_boxes(self, box: Rectangle, axis, split_percentage=0.5) -> Tuple[Rectangle, Rectangle]:
assert axis in ["x", "y"]
edge_anchor_point, edge_length = (box.x1, box.width) if axis == "x" else (box.y1, box.height)
split_coordinate = split_percentage * edge_length + edge_anchor_point
child_boxes = get_child_boxes(box, split_coordinate, axis, self.recursive_margin_percentage)
return child_boxes
def recurse(self, depth):
return rnd.random() <= self.recursion_probability(depth)
def recursion_probability(self, depth):
return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth
class RandomPagePartitioner(PagePartitioner):
def __init__(self):
super().__init__()
def generate_content_boxes(self, box: Rectangle, depth=0):
if depth >= self.max_recursion_depth:
yield box
else:
child_boxes = self.generate_child_boxes(
box,
axis=rnd.choice(["x", "y"]),
split_percentage=rnd.uniform(0.3, 0.7),
)
if self.recurse(depth):
yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
else:
yield child_boxes
class TwoColumnPagePartitioner(PagePartitioner):
def __init__(self):
super().__init__()
self.max_recursion_depth = 3
def generate_content_boxes(self, box: Rectangle, depth=0):
if depth >= self.max_recursion_depth:
yield box
else:
if depth == 0:
axis = "x"
split_percentage = 0.5
else:
axis = "y"
split_percentage = rnd.choice([0.3, 0.7])
child_boxes = self.generate_child_boxes(box, axis=axis, split_percentage=split_percentage)
yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
def get_child_boxes(box: Rectangle, split_coordinate, axis, margin_percentage) -> Tuple[Rectangle, Rectangle]:
assert axis in ["x", "y"]
def low(point_1d):
return point_1d * (1 + margin_percentage)
def high(point_1d):
return point_1d * (1 - margin_percentage)
if axis == "x":
return (
Rectangle(low(box.x1), low(box.y1), high(split_coordinate), high(box.y2)),
Rectangle(low(split_coordinate), low(box.y1), high(box.x2), high(box.y2)),
)
else:
return (
Rectangle(low(box.x1), low(box.y1), high(box.x2), high(split_coordinate)),
Rectangle(low(box.x1), low(split_coordinate), high(box.x2), high(box.y2)),
)
def drop_small_boxes(boxes: Iterable[Rectangle], page_width, page_height, min_percentage=0.13) -> List[Rectangle]:
min_width = page_width * min_percentage
min_height = page_height * min_percentage
def small(box: Rectangle):
return box.width < min_width or box.height < min_height
return lremove(small, boxes)
def draw_boxes(page: Image, boxes: Iterable[Rectangle]):
# page = draw_rectangles(page, boxes, filled=False, annotate=True)
show_image(page, backend="pil")
@pytest.fixture
def page_with_opaque_content(
blank_page,
tinted_blank_page,
texture,
texture_fn,
) -> Tuple[np.ndarray, Iterable[Rectangle]]:
"""Creates a page with content"""
page_partitioner = rnd.choice(
[
TwoColumnPagePartitioner(),
# RandomPagePartitioner(),
]
)
texture = random_flip(texture)
texture = texture_fn(texture)
boxes = page_partitioner(texture)
content_generator = ContentGenerator()
boxes = content_generator(boxes)
page = paste_contents(texture, boxes)
return page, boxes
@pytest.fixture
def page_with_translucent_content(
blank_page, tinted_blank_page, texture, texture_fn
) -> Tuple[np.ndarray, List[Rectangle]]:
"""Creates a page with content"""
page_partitioner = rnd.choice(
[
TwoColumnPagePartitioner(),
# RandomPagePartitioner(),
]
)
boxes = page_partitioner(blank_page)
content_generator = ContentGenerator()
boxes = content_generator(boxes)
page_content = paste_contents(blank_page, boxes)
texture = random_flip(texture)
texture = texture_fn(texture)
texture.putalpha(255)
page_content.putalpha(255)
factor = 1.2
enhancer = ImageEnhance.Contrast(texture)
texture = enhancer.enhance(factor)
page = blend_modes.multiply(
*map(
to_array,
(
page_content,
texture,
),
),
opacity=1,
).astype(np.uint8)
return page, boxes
@pytest.fixture
def page_with_content(
page_with_translucent_content,
# page_with_opaque_content,
) -> np.ndarray:
page, boxes = page_with_translucent_content
# page, boxes = page_with_opaque_content
draw_boxes(page, boxes)
return page