Cells now draw only inner borders and the table draws the outer border if the layout is "closed". This avoids multiple lines around cells of nested tables, since nested tables are now created with the layout parameter set to "open", in which case the table does not draw its borders.
1420 lines
47 KiB
Python
1420 lines
47 KiB
Python
import abc
|
|
import io
|
|
import itertools
|
|
import random
|
|
import string
|
|
import textwrap
|
|
from enum import Enum
|
|
from functools import lru_cache, partial
|
|
from math import sqrt
|
|
from pathlib import Path
|
|
from typing import Tuple, Union, Iterable, List
|
|
|
|
import albumentations as A
|
|
import cv2 as cv
|
|
import numpy as np
|
|
import pandas as pd
|
|
import pytest
|
|
from PIL import Image, ImageOps, ImageFont, ImageDraw
|
|
from PIL.Image import Transpose
|
|
from faker import Faker
|
|
from loguru import logger
|
|
from matplotlib import pyplot as plt
|
|
from tabulate import tabulate
|
|
|
|
from cv_analysis.table_parsing import isolate_vertical_and_horizontal_components
|
|
from cv_analysis.utils import star, rconj, conj
|
|
from cv_analysis.utils.common import normalize_to_gray_scale
|
|
from cv_analysis.utils.conversion import normalize_image_format_to_array, normalize_image_format_to_pil
|
|
from cv_analysis.utils.merging import merge_related_rectangles
|
|
from cv_analysis.utils.postprocessing import remove_overlapping, remove_included
|
|
from cv_analysis.utils.spacial import area
|
|
|
|
random_seed = random.randint(0, 2**32 - 1)
|
|
random_seed = 3896311122
|
|
rnd = random.Random(random_seed)
|
|
logger.info(f"Random seed: {random_seed}")
|
|
|
|
#
|
|
# transform = A.Compose(
|
|
# [
|
|
# # geometric transforms
|
|
# A.HorizontalFlip(p=0.2),
|
|
# A.RandomRotate90(p=0.2),
|
|
# A.VerticalFlip(p=0.2),
|
|
# # brightness and contrast transforms
|
|
# A.OneOf(
|
|
# [
|
|
# A.RandomGamma(p=0.5),
|
|
# A.RandomBrightnessContrast(p=0.5),
|
|
# ],
|
|
# p=0.5,
|
|
# ),
|
|
# # noise transforms
|
|
# A.SomeOf(
|
|
# [
|
|
# A.Emboss(p=0.05),
|
|
# A.ImageCompression(p=0.05),
|
|
# A.PixelDropout(p=0.05),
|
|
# ],
|
|
# p=0.5,
|
|
# n=2,
|
|
# ),
|
|
# # color transforms
|
|
# A.SomeOf(
|
|
# [
|
|
# A.ColorJitter(p=1),
|
|
# A.RGBShift(p=1, r_shift_limit=0.1, g_shift_limit=0.1, b_shift_limit=0.1),
|
|
# A.ChannelShuffle(p=1),
|
|
# ],
|
|
# p=0.5,
|
|
# n=3, # 3 => all
|
|
# ),
|
|
# # blurring and sharpening transforms
|
|
# A.OneOf(
|
|
# [
|
|
# A.GaussianBlur(p=0.05),
|
|
# A.MotionBlur(p=0.05, blur_limit=21),
|
|
# A.Sharpen(p=0.05),
|
|
# ],
|
|
# p=0.5,
|
|
# ),
|
|
# # environmental transforms
|
|
# A.OneOf(
|
|
# [
|
|
# A.RandomRain(p=0.2, rain_type="drizzle"),
|
|
# A.RandomFog(p=0.2, fog_coef_upper=0.4),
|
|
# A.RandomSnow(p=0.2),
|
|
# ],
|
|
# p=0.5,
|
|
# ),
|
|
# ],
|
|
# p=0.5,
|
|
# )
|
|
from funcy import (
|
|
juxt,
|
|
compose,
|
|
identity,
|
|
lflatten,
|
|
lmap,
|
|
first,
|
|
iterate,
|
|
take,
|
|
last,
|
|
rest,
|
|
rcompose,
|
|
lsplit,
|
|
lfilter,
|
|
lzip,
|
|
keep,
|
|
repeatedly,
|
|
mapcat,
|
|
omit,
|
|
project,
|
|
complement,
|
|
lremove,
|
|
chunks,
|
|
)
|
|
|
|
from cv_analysis.locations import TEST_PAGE_TEXTURES_DIR
|
|
|
|
# transform = A.Compose(
|
|
# [
|
|
# # brightness and contrast transforms
|
|
# A.OneOf(
|
|
# [
|
|
# A.RandomGamma(p=0.2),
|
|
# A.RandomBrightnessContrast(p=0.2, brightness_limit=0.05, contrast_limit=0.05),
|
|
# ],
|
|
# p=0.5,
|
|
# ),
|
|
# # color transforms
|
|
# A.SomeOf(
|
|
# [
|
|
# A.ColorJitter(p=1),
|
|
# A.RGBShift(p=1, r_shift_limit=0.3, g_shift_limit=0.3, b_shift_limit=0.3),
|
|
# A.ChannelShuffle(p=1),
|
|
# ],
|
|
# p=1.0,
|
|
# n=3, # 3 => all
|
|
# ),
|
|
# # # blurring and sharpening transforms
|
|
# # A.OneOf(
|
|
# # [
|
|
# # A.GaussianBlur(p=0.05),
|
|
# # A.MotionBlur(p=0.05, blur_limit=21),
|
|
# # A.Sharpen(p=0.05),
|
|
# # ],
|
|
# # p=0.0,
|
|
# # ),
|
|
# ]
|
|
# )
|
|
from cv_analysis.utils.display import show_image
|
|
from cv_analysis.utils.rectangle import Rectangle
|
|
|
|
transform = A.Compose(
|
|
[
|
|
# A.ColorJitter(p=1),
|
|
]
|
|
)
|
|
|
|
|
|
Color = Tuple[int, int, int]
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
"rough_grain",
|
|
# "plain",
|
|
# "digital",
|
|
# "crumpled",
|
|
]
|
|
)
|
|
def base_texture(request, size):
|
|
texture = Image.open(TEST_PAGE_TEXTURES_DIR / (request.param + ".jpg"))
|
|
texture = texture.resize(size)
|
|
return texture
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
"portrait",
|
|
# "landscape",
|
|
]
|
|
)
|
|
def orientation(request):
|
|
return request.param
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
# 30,
|
|
100,
|
|
]
|
|
)
|
|
def dpi(request):
|
|
return request.param
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
"brown",
|
|
# "sepia",
|
|
# "gray",
|
|
# "white",
|
|
# "light_red",
|
|
# "light_blue",
|
|
]
|
|
)
|
|
def color_name(request):
|
|
return request.param
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
# "smooth",
|
|
# "coarse",
|
|
"neutral",
|
|
]
|
|
)
|
|
def texture_name(request):
|
|
return request.param
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
30,
|
|
# 70,
|
|
# 150,
|
|
]
|
|
)
|
|
def color_intensity(request):
|
|
return request.param
|
|
|
|
|
|
def random_flip(image):
|
|
if rnd.choice([True, False]):
|
|
image = image.transpose(Transpose.FLIP_LEFT_RIGHT)
|
|
if rnd.choice([True, False]):
|
|
image = image.transpose(Transpose.FLIP_TOP_BOTTOM)
|
|
return image
|
|
|
|
|
|
@pytest.fixture
|
|
def color(color_name):
|
|
return {
|
|
"brown": "#7d6c5b",
|
|
"sepia": "#b8af88",
|
|
"gray": "#9c9c9c",
|
|
"white": "#ffffff",
|
|
"light_red": "#d68c8b",
|
|
"light_blue": "#8bd6d6",
|
|
}[color_name]
|
|
|
|
|
|
@pytest.fixture
|
|
def texture_fn(texture_name, size):
|
|
if texture_name == "smooth":
|
|
fn = blur
|
|
elif texture_name == "coarse":
|
|
fn = compose(overlay, juxt(blur, sharpen))
|
|
else:
|
|
fn = identity
|
|
|
|
return normalize_image_function(fn)
|
|
|
|
|
|
def blur(image: np.ndarray):
|
|
return cv.blur(image, (3, 3))
|
|
|
|
|
|
def normalize_image_function(func):
|
|
def inner(image):
|
|
image = normalize_image_format_to_array(image)
|
|
image = func(image)
|
|
image = normalize_image_format_to_pil(image)
|
|
return image
|
|
|
|
return inner
|
|
|
|
|
|
def sharpen(image: np.ndarray):
|
|
return cv.filter2D(image, -1, np.array([[-1, -1, -1], [-1, 6, -1], [-1, -1, -1]]))
|
|
|
|
|
|
def overlay(images, mode=np.sum):
|
|
assert mode in [np.sum, np.max]
|
|
images = np.stack(list(images))
|
|
image = mode(images, axis=0)
|
|
image = (image / image.max() * 255).astype(np.uint8)
|
|
return image
|
|
|
|
|
|
@pytest.fixture
|
|
def texture(base_texture, color, color_intensity):
|
|
color_image = Image.new("RGBA", base_texture.size, color)
|
|
color_image.putalpha(color_intensity)
|
|
texture = superimpose_texture_with_transparency(base_texture, color_image)
|
|
return texture
|
|
|
|
|
|
def tint_image(src, color="#FFFFFF"):
|
|
src.load()
|
|
r, g, b, alpha = src.split()
|
|
gray = ImageOps.grayscale(src)
|
|
result = ImageOps.colorize(gray, (0, 0, 0), color)
|
|
result.putalpha(alpha)
|
|
return result
|
|
|
|
|
|
def color_shift_array(image: np.ndarray, color: Color):
|
|
"""Creates a 3-tensor from a 2-tensor by stacking the 2-tensor three times weighted by the color tuple."""
|
|
assert image.ndim == 3
|
|
assert image.shape[-1] == 3
|
|
assert isinstance(color, tuple)
|
|
assert max(color) <= 255
|
|
assert image.max() <= 255
|
|
|
|
color = np.array(color)
|
|
weights = color / color.sum() / 10
|
|
assert max(weights) <= 1
|
|
|
|
colored = (image * weights).astype(np.uint8)
|
|
|
|
assert colored.shape == image.shape
|
|
|
|
return colored
|
|
|
|
|
|
@pytest.fixture
|
|
def size(dpi, orientation):
|
|
if orientation == "portrait":
|
|
size = (8.5 * dpi, 11 * dpi)
|
|
elif orientation == "landscape":
|
|
size = (11 * dpi, 8.5 * dpi)
|
|
else:
|
|
raise ValueError(f"Unknown orientation: {orientation}")
|
|
size = tuple(map(int, size))
|
|
return size
|
|
|
|
|
|
def superimpose_texture_with_transparency(page: Image, texture: Image, autocrop=True) -> Image:
|
|
"""Superimposes a noise image with transparency onto a page image."""
|
|
|
|
if autocrop:
|
|
texture = texture.crop(texture.getbbox())
|
|
|
|
if page.size != texture.size:
|
|
logger.trace(f"Padding image before pasting to fit size {page.size}")
|
|
texture = pad_image_to_size(texture, page.size)
|
|
|
|
assert page.size == texture.size
|
|
assert texture.mode == "RGBA"
|
|
page.paste(texture, (0, 0), texture)
|
|
return page
|
|
|
|
|
|
def pad_image_to_size(image: Image, size: Tuple[int, int]) -> Image:
|
|
"""Pads an image to a given size."""
|
|
if image.size == size:
|
|
return image
|
|
|
|
if image.size[0] > size[0] or image.size[1] > size[1]:
|
|
raise ValueError(f"Image size {image.size} is larger than target size {size}.")
|
|
|
|
padded = Image.new(image.mode, size, color=255)
|
|
pasting_coords = compute_pasting_coordinates(image, padded)
|
|
assert image.mode == "RGBA"
|
|
padded.paste(image, pasting_coords)
|
|
return padded
|
|
|
|
|
|
def compute_pasting_coordinates(smaller: Image, larger: Image.Image):
|
|
"""Computes the coordinates for centrally pasting a smaller image onto a larger image."""
|
|
return abs(larger.width - smaller.width) // 2, abs(larger.height - smaller.height) // 2
|
|
|
|
|
|
@pytest.fixture
|
|
def blank_page(texture, texture_fn) -> np.ndarray:
|
|
"""Creates a blank page with a given orientation and dpi."""
|
|
page = random_flip(texture)
|
|
page = texture_fn(page)
|
|
page_partitioner = rnd.choice(
|
|
[
|
|
TwoColumnPagePartitioner(),
|
|
# RandomPagePartitioner(),
|
|
]
|
|
)
|
|
boxes = page_partitioner(page)
|
|
content_generator = ContentGenerator()
|
|
boxes = content_generator(boxes)
|
|
page = paste_contents(page, boxes)
|
|
draw_boxes(page, boxes)
|
|
|
|
page = np.array(page)
|
|
return page
|
|
|
|
|
|
class ContentRectangle(Rectangle):
|
|
def __init__(self, x1, y1, x2, y2, content=None):
|
|
super().__init__(x1, y1, x2, y2)
|
|
self.content = content
|
|
|
|
def __repr__(self):
|
|
return f"{self.__class__.__name__}({self.x1}, {self.y1}, {self.x2}, {self.y2}, content={self.content})"
|
|
|
|
|
|
class ContentGenerator:
|
|
def __init__(self):
|
|
self.constrain_layouts = True
|
|
|
|
def __call__(self, boxes: List[Rectangle]) -> Image:
|
|
rnd.shuffle(boxes)
|
|
|
|
figure_boxes, text_boxes = lsplit(is_square_like, boxes)
|
|
|
|
if self.constrain_layouts:
|
|
figure_boxes = merge_related_rectangles(figure_boxes)
|
|
figure_boxes = lfilter(is_square_like, figure_boxes)
|
|
text_boxes = merge_related_rectangles(text_boxes)
|
|
|
|
boxes = list(
|
|
itertools.chain(
|
|
map(generate_random_text_block, every_nth(2, text_boxes)),
|
|
*zipmap(generate_recursive_random_table_with_caption, every_nth(2, text_boxes[1:])),
|
|
*zipmap(generate_recursive_random_table_with_caption, every_nth(2, figure_boxes)),
|
|
*zipmap(generate_random_plot_with_caption, every_nth(2, figure_boxes[1:])),
|
|
)
|
|
)
|
|
|
|
if self.constrain_layouts:
|
|
boxes = remove_included(boxes)
|
|
boxes = remove_overlapping(boxes)
|
|
|
|
return boxes
|
|
|
|
|
|
def zipmap(fn, boxes, n=2):
|
|
rets = lmap(list, zip(*map(fn, boxes)))
|
|
yield from repeatedly(lambda: [], n) if len(rets) < n else rets
|
|
|
|
|
|
def is_square_like(box: Rectangle):
|
|
return box.width / box.height > 0.5 and box.height / box.width > 0.5
|
|
|
|
|
|
def every_nth(n, iterable):
|
|
return itertools.islice(iterable, 0, None, n)
|
|
|
|
|
|
def generate_random_plot_with_caption(rectangle: Rectangle):
|
|
plot_box, caption_box = split_into_figure_and_caption(rectangle)
|
|
plot_box = generate_random_plot(plot_box)
|
|
caption_box = generate_random_image_caption(caption_box)
|
|
return plot_box, caption_box
|
|
|
|
|
|
def generate_recursive_random_table_with_caption(rectangle: Rectangle):
|
|
table_box, caption_box = split_into_figure_and_caption(rectangle)
|
|
table_box = generate_recursive_random_table(table_box)
|
|
caption_box = generate_random_table_caption(caption_box)
|
|
return table_box, caption_box
|
|
|
|
|
|
def split_into_figure_and_caption(rectangle: Rectangle):
|
|
gap_percentage = rnd.uniform(0, 0.03)
|
|
split_point = rnd.uniform(0.5, 0.9)
|
|
figure_box = Rectangle(
|
|
rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y1 + rectangle.height * (split_point - gap_percentage / 2)
|
|
)
|
|
caption_box = Rectangle(
|
|
rectangle.x1, rectangle.y1 + rectangle.height * (split_point + gap_percentage / 2), rectangle.x2, rectangle.y2
|
|
)
|
|
return figure_box, caption_box
|
|
|
|
|
|
def generate_random_plot(rectangle: Rectangle) -> ContentRectangle:
|
|
block = RandomPlot(*rectangle.coords)
|
|
# block.content = attrgetter("content")(block)
|
|
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
|
print(block.content)
|
|
block.generate_random_plot(rectangle)
|
|
return block
|
|
|
|
|
|
def generate_random_table(rectangle: Rectangle) -> ContentRectangle:
|
|
block = RandomTable(*rectangle.coords)
|
|
block.content = (
|
|
rectangle.content if isinstance(rectangle, (ContentRectangle, RandomContentRectangle)) else None
|
|
) # TODO: Refactor
|
|
block.generate_random_table(rectangle)
|
|
return block
|
|
|
|
|
|
def generate_recursive_random_table(rectangle: Rectangle, **kwargs) -> ContentRectangle:
|
|
block = RecursiveRandomTable(*rectangle.coords, **kwargs)
|
|
if isinstance(rectangle, RecursiveRandomTable):
|
|
block.content = rectangle.content if rectangle.content else None # TODO: Refactor
|
|
block.generate_random_table()
|
|
return block
|
|
|
|
|
|
@lru_cache(maxsize=None)
|
|
def get_random_seed():
|
|
return rnd.randint(0, 2**32 - 1)
|
|
|
|
|
|
class RandomContentRectangle(ContentRectangle):
|
|
def __init__(self, x1, y1, x2, y2, content=None, seed=None):
|
|
super().__init__(x1, y1, x2, y2, content)
|
|
self.seed = seed or get_random_seed()
|
|
self.random = random.Random(self.seed)
|
|
|
|
|
|
class Size(Enum):
|
|
# SMALL = sqrt(100**2)
|
|
# MEDIUM = sqrt((100 * 3) ** 2)
|
|
# LARGE = sqrt((100 * 10) ** 2)
|
|
|
|
SMALL = 100
|
|
MEDIUM = 180
|
|
LARGE = 300
|
|
|
|
|
|
def get_size_class(rectangle: Rectangle):
|
|
size = get_size(rectangle)
|
|
if size < Size.SMALL.value:
|
|
return Size.SMALL
|
|
elif size < Size.LARGE.value:
|
|
return Size.MEDIUM
|
|
else:
|
|
return Size.LARGE
|
|
|
|
|
|
def get_size(rectangle: Rectangle):
|
|
size = sqrt(area(rectangle))
|
|
return size
|
|
|
|
|
|
class RecursiveRandomTable(RandomContentRectangle):
|
|
def __init__(self, x1, y1, x2, y2, seed=None, border_width=1, layout=None):
|
|
super().__init__(x1, y1, x2, y2, seed=seed)
|
|
self.n_columns = rnd.randint(1, max(self.width // 100, 1))
|
|
self.n_rows = rnd.randint(1, max(self.height // rnd.randint(17, 100), 1))
|
|
self.cell_size = (self.width // self.n_columns, self.height // self.n_rows)
|
|
|
|
self.content = Image.new("RGBA", (self.width, self.height), (255, 255, 255, 0))
|
|
self.background_color = tuple([rnd.randint(100, 200) for _ in range(4)])
|
|
# self.background_color = tuple([random.randint(0, 100) for _ in range(4)])
|
|
|
|
self.cell_border_color = (0, 0, 0, 255) # (*map(lambda x: int(x * 0.8), self.background_color[:3]), 255)
|
|
|
|
# TODO: Refactor layout selection
|
|
# self.layout = rnd.choice(["closed", "horizontal", "vertical", "open"])
|
|
|
|
# # Overwrite the layout choice in some cases
|
|
# if self.n_columns == 1 and self.n_rows == 1:
|
|
# self.layout = "closed"
|
|
# elif self.n_columns == 1:
|
|
# self.layout = rnd.choice(["vertical", "closed"])
|
|
# elif self.n_rows == 1:
|
|
# self.layout = rnd.choice(["horizontal", "closed"])
|
|
|
|
self.layout = "closed" # TODO: Remove this line
|
|
|
|
self.layout = layout or self.layout
|
|
|
|
logger.debug(f"Layout: {self.layout}")
|
|
# self.draw_single_cell_borders(self, border_width, fill=(0, 0, 0, 0))
|
|
|
|
self.cells = None
|
|
|
|
def generate_random_table(self):
|
|
cells = self.generate_table()
|
|
cells = list(self.fill_cells_with_content(cells))
|
|
self.cells = list(self.draw_cell_borders(cells))
|
|
|
|
self.content = paste_contents(self.content, cells)
|
|
assert self.content.mode == "RGBA"
|
|
|
|
def fill_cells_with_content(self, cells):
|
|
for cell in cells:
|
|
# self.draw_single_cell_borders(cell, width=1)
|
|
|
|
def inner(cell):
|
|
|
|
inner_region = shrink_rectangle(cell, 0.11)
|
|
|
|
choice = rnd.choice(["text", "plot", "recurse", "plain_table", "blank"])
|
|
size = get_size(inner_region)
|
|
|
|
if size <= Size.SMALL.value:
|
|
words = generate_random_words(1, 3)
|
|
return generate_text_block(cell, " ".join(words))
|
|
|
|
elif size <= Size.MEDIUM.value:
|
|
|
|
choice = rnd.choice(["plot", "recurse"])
|
|
|
|
if choice == "plot": # and is_square_like(cell):
|
|
return generate_random_plot(cell)
|
|
|
|
elif choice == "recurse":
|
|
return generate_recursive_random_table(cell, border_width=1, layout="open")
|
|
|
|
else:
|
|
return generate_text_block(cell, f"{choice} {size:.0f} {get_size_class(cell).name}")
|
|
|
|
elif size <= Size.LARGE.value:
|
|
|
|
choice = rnd.choice(["plot", "recurse"])
|
|
|
|
logger.debug(f"Generating {choice} {size:.0f} {get_size_class(cell).name}")
|
|
|
|
if choice == "plot" and is_square_like(cell):
|
|
return generate_random_plot(cell)
|
|
|
|
else:
|
|
logger.debug(f"recurse {size:.0f} {get_size_class(cell).name}")
|
|
return generate_recursive_random_table(cell, border_width=1, layout="open")
|
|
else:
|
|
return generate_text_block(cell, f"{choice} {size:.0f} {get_size_class(cell).name}")
|
|
|
|
cell = inner(cell)
|
|
# self.draw_single_cell_borders(cell, fill=None, width=2)
|
|
|
|
assert cell.content.mode == "RGBA"
|
|
|
|
yield cell
|
|
|
|
def draw_cell_borders(self, cells: List[ContentRectangle]):
|
|
# for cell in cells:
|
|
# self.draw_single_cell_borders(cell, fill=self.background_color)
|
|
|
|
def draw_edges_based_on_position(cell: Cell, col_idx, row_index):
|
|
# Draw the borders of the cell based on its position in the table
|
|
if col_idx < self.n_columns - 1:
|
|
cell.draw_right_border()
|
|
|
|
if row_index < self.n_rows - 1:
|
|
cell.draw_bottom_border()
|
|
|
|
if self.layout == "closed":
|
|
c = Cell(*self.coords, self.background_color)
|
|
c.content = self.content
|
|
c.draw()
|
|
|
|
columns = chunks(self.n_rows, cells)
|
|
for col_idx, columns in enumerate(columns):
|
|
for row_index, cell in enumerate(columns):
|
|
# TODO: Refactor
|
|
c = Cell(*cell.coords, self.background_color)
|
|
c.content = cell.content
|
|
draw_edges_based_on_position(c, col_idx, row_index)
|
|
yield cell
|
|
|
|
# def draw_single_cell_borders(self, cell: ContentRectangle, width=1, fill=None):
|
|
# # fill = (0, 0, 0, 0) if fill is None else fill
|
|
# image = cell.content or Image.new("RGBA", (cell.width, cell.height), (255, 255, 255, 0))
|
|
# assert image.mode == "RGBA"
|
|
# draw = ImageDraw.Draw(image)
|
|
#
|
|
# # TODO: Refactor
|
|
# if self.layout == "closed":
|
|
# draw.rectangle((0, 0, cell.width - 1, cell.height - 1), outline=self.cell_border_color, width=width)
|
|
# elif self.layout == "vertical":
|
|
# draw.line((0, 0, 0, cell.height - 1), width=width, fill=self.cell_border_color)
|
|
# draw.line((cell.width - 1, 0, cell.width - 1, cell.height - 1), width=width, fill=self.cell_border_color)
|
|
# elif self.layout == "horizontal":
|
|
# draw.line((0, 0, cell.width - 1, 0), width=width, fill=self.cell_border_color)
|
|
# draw.line((0, cell.height - 1, cell.width - 1, cell.height - 1), width=width, fill=self.cell_border_color)
|
|
# elif self.layout == "open":
|
|
# pass
|
|
# else:
|
|
# raise ValueError(f"Invalid layout '{self.layout}'")
|
|
# cell.content = image
|
|
# assert cell.content.mode == "RGBA"
|
|
# return cell
|
|
|
|
def generate_table(self) -> Iterable[ContentRectangle]:
|
|
yield from mapcat(self.generate_column, range(self.n_columns))
|
|
|
|
def generate_column(self, column_index) -> Iterable[ContentRectangle]:
|
|
logger.trace(f"Generating column {column_index}.")
|
|
generate_cell_for_row_index = partial(self.generate_cell, column_index)
|
|
yield from map(generate_cell_for_row_index, range(self.n_rows))
|
|
|
|
def generate_cell(self, column_index, row_index) -> ContentRectangle:
|
|
w, h = self.cell_size
|
|
x1, y1 = (column_index * w), (row_index * h)
|
|
x2, y2 = x1 + w, y1 + h
|
|
logger.trace(f"Generating cell ({row_index}, {column_index}) at ({x1}, {y1}, {x2}, {y2}).")
|
|
return Cell(x1, y1, x2, y2, self.background_color)
|
|
|
|
def generate_column_names(self):
|
|
column_names = repeatedly(self.generate_column_name, self.n_columns)
|
|
return column_names
|
|
|
|
def generate_column_name(self):
|
|
column_name = generate_random_words(1, 3)
|
|
return column_name
|
|
|
|
|
|
class Cell(ContentRectangle):
|
|
def __init__(self, x1, y1, x2, y2, color):
|
|
super().__init__(x1, y1, x2, y2)
|
|
self.cell_border_color = (0, 0, 0, 255)
|
|
|
|
# self.background_color = tuple([random.randint(100, 200) for _ in range(4)])
|
|
# self.cell_border_color = (*map(lambda x: int(x * 0.8), self.background_color[:3]), 255)
|
|
|
|
self.border_width = 1
|
|
self.inset = 1
|
|
# image = Image.fromarray(np.random.uniform(0, 255, size=(self.height, self.width, 4)).astype(np.uint8))
|
|
|
|
# self.content = image.convert("RGBA")
|
|
# self.content = Image.new("RGBA", (self.width, self.height), (255, 255, 255, 0))
|
|
|
|
# self.content = Image.new(
|
|
# "RGBA",
|
|
# (self.width, self.height),
|
|
# color=tuple(random.randint(0, 255) * np.ones(3).astype(np.uint8)),
|
|
# )
|
|
|
|
print(color)
|
|
|
|
self.content = Image.new("RGBA", (self.width, self.height), color=color[:4])
|
|
# self.content = Image.new("RGBA", (self.width, self.height), color=color[:3])
|
|
|
|
def draw_top_border(self, width=None):
|
|
self.draw_line((0, 0, self.width - self.inset, 0), width=width)
|
|
return self
|
|
|
|
def draw_bottom_border(self, width=None):
|
|
self.draw_line((0, self.height - self.inset, self.width - self.inset, self.height - self.inset), width=width)
|
|
return self
|
|
|
|
def draw_left_border(self, width=None):
|
|
self.draw_line((0, 0, 0, self.height - self.inset), width=width)
|
|
return self
|
|
|
|
def draw_right_border(self, width=None):
|
|
self.draw_line(
|
|
(self.width - self.inset, +self.inset, self.width - self.inset, self.height - self.inset), width=width
|
|
)
|
|
return self
|
|
|
|
def draw_line(self, points, width=None):
|
|
width = width or self.border_width
|
|
draw = ImageDraw.Draw(self.content)
|
|
draw.line(points, width=width, fill=self.cell_border_color)
|
|
return self
|
|
|
|
def draw(self, width=None):
|
|
self.draw_top_border(width=width)
|
|
self.draw_bottom_border(width=width)
|
|
self.draw_left_border(width=width)
|
|
self.draw_right_border(width=width)
|
|
return self
|
|
|
|
def draw_top_left_corner(self, width=None):
|
|
self.draw_line((0, 0, 0, 0), width=width)
|
|
self.draw_line((0, 0, 0, 0), width=width)
|
|
return self
|
|
|
|
def draw_top_right_corner(self, width=None):
|
|
self.draw_line((self.width - self.inset, 0, self.width - self.inset, 0), width=width)
|
|
self.draw_line((self.width - self.inset, 0, self.width - self.inset, 0), width=width)
|
|
return self
|
|
|
|
def draw_bottom_left_corner(self, width=None):
|
|
self.draw_line((0, self.height - self.inset, 0, self.height - self.inset), width=width)
|
|
self.draw_line((0, self.height - self.inset, 0, self.height - self.inset), width=width)
|
|
return self
|
|
|
|
def draw_bottom_right_corner(self, width=None):
|
|
self.draw_line(
|
|
(self.width - self.inset, self.height - self.inset, self.width - self.inset, self.height - self.inset),
|
|
width=width,
|
|
)
|
|
self.draw_line(
|
|
(self.width - self.inset, self.height - self.inset, self.width - self.inset, self.height - self.inset),
|
|
width=width,
|
|
)
|
|
return self
|
|
|
|
|
|
def generate_random_words(n_min, n_max):
|
|
column_name = Faker().words(rnd.randint(n_min, n_max))
|
|
return column_name
|
|
|
|
|
|
def shrink_rectangle(rectangle: Rectangle, factor: float) -> Rectangle:
|
|
x1, y1, x2, y2 = compute_scaled_coordinates(rectangle, factor)
|
|
|
|
logger.trace(f"Shrinking {rectangle} by {factor} to ({x1}, {y1}, {x2}, {y2}).")
|
|
|
|
assert x1 >= rectangle.x1
|
|
assert y1 >= rectangle.y1
|
|
assert x2 <= rectangle.x2
|
|
assert y2 <= rectangle.y2
|
|
|
|
shrunk_rectangle = Rectangle(x1, y1, x2, y2)
|
|
|
|
if isinstance(rectangle, ContentRectangle): # TODO: Refactor
|
|
shrunk_rectangle = ContentRectangle(*shrunk_rectangle.coords, rectangle.content)
|
|
|
|
return shrunk_rectangle
|
|
|
|
|
|
def compute_scaled_coordinates(rectangle: Rectangle, factor: float) -> Tuple[int, int, int, int]:
|
|
# TODO: Refactor: Using image to compute coordinates is not clean
|
|
image = Image.new("RGBA", (rectangle.width, rectangle.height))
|
|
scaled = image.resize((int(rectangle.width * (1 - factor)), int(rectangle.height * (1 - factor))))
|
|
|
|
x1, y1 = compute_pasting_coordinates(scaled, image)
|
|
x1 = rectangle.x1 + x1
|
|
y1 = rectangle.y1 + y1
|
|
x2, y2 = x1 + scaled.width, y1 + scaled.height
|
|
return x1, y1, x2, y2
|
|
|
|
|
|
class RandomTable(RandomContentRectangle):
|
|
def __init__(self, x1, y1, x2, y2, seed=None):
|
|
super().__init__(x1, y1, x2, y2, seed=seed)
|
|
self.font = pick_random_mono_space_font_available_on_system(includes=("bold",), excludes=("italic", "oblique"))
|
|
|
|
def generate_random_table(self, rectangle: Rectangle):
|
|
"""Generates the image of a random table.
|
|
|
|
Reference: https://stackoverflow.com/questions/35634238/how-to-save-a-pandas-dataframe-table-as-a-png
|
|
"""
|
|
|
|
text_table = self.generate_random_ascii_table(rectangle)
|
|
table_lines = text_table.split("\n")
|
|
image = write_lines_to_image(table_lines, rectangle)
|
|
self.join_lines(image)
|
|
|
|
self.content = image if not self.content else superimpose_texture_with_transparency(self.content, image)
|
|
|
|
def generate_random_dataframe(self, rectangle: Rectangle):
|
|
"""Generates a random dataframe that has as many rows and columns as to fit the given rectangle."""
|
|
|
|
image = Image.new("RGB", (rectangle.width, rectangle.height), color="white")
|
|
draw = ImageDraw.Draw(image)
|
|
text_size = draw.textsize("dummy", font=ImageFont.load_default())[1]
|
|
|
|
rows = rectangle.height // text_size
|
|
|
|
col_names = list(string.ascii_uppercase)
|
|
|
|
cols = min(rectangle.width // text_size, len(col_names))
|
|
|
|
df = pd.DataFrame(
|
|
np.random.randint(0, 100, size=(rows, cols)),
|
|
columns=col_names[:cols],
|
|
)
|
|
|
|
return df
|
|
|
|
def join_lines(self, table: Image.Image):
|
|
table = normalize_image_format_to_array(table)
|
|
table = normalize_to_gray_scale(table)
|
|
grid = isolate_vertical_and_horizontal_components(table)
|
|
# grid = cv2.bitwise_not(grid)
|
|
|
|
def generate_random_ascii_table(self, rectangle: Rectangle):
|
|
df = self.generate_random_dataframe(rectangle)
|
|
table_format = rnd.choice(
|
|
[
|
|
# "simple",
|
|
"grid",
|
|
# "presto",
|
|
# "psql",
|
|
# "rst",
|
|
]
|
|
)
|
|
text_table = tabulate(df, headers="keys", tablefmt=table_format)
|
|
return text_table
|
|
|
|
|
|
def dump_plt_to_image(rectangle):
|
|
buf = io.BytesIO()
|
|
plt.savefig(buf, format="png", transparent=True)
|
|
buf.seek(0)
|
|
image = Image.open(buf)
|
|
image = image.resize((rectangle.width, rectangle.height))
|
|
buf.close()
|
|
plt.close()
|
|
return image
|
|
|
|
|
|
class RandomFontPicker:
|
|
def __init__(self, font_dir=None, return_default_font=False):
|
|
fonts = get_fonts(font_dir)
|
|
fonts_lower = [font.lower() for font in fonts]
|
|
domestic_fonts_mask = lmap(complement(self.looks_foreign), fonts_lower)
|
|
self.fonts = list(itertools.compress(fonts, domestic_fonts_mask))
|
|
self.fonts_lower = list(itertools.compress(fonts_lower, domestic_fonts_mask))
|
|
|
|
self.test_image = Image.new("RGB", (200, 200), (255, 255, 255))
|
|
self.draw = ImageDraw.Draw(self.test_image)
|
|
self.return_default_font = return_default_font
|
|
|
|
def looks_foreign(self, font):
|
|
# This filters out foreign fonts (e.g. 'Noto Serif Malayalam')
|
|
return len(font.split("-")[0]) > 10
|
|
|
|
def pick_random_font_available_on_system(self, includes=None, excludes=None) -> ImageFont: # FIXME: Slow!
|
|
|
|
if self.return_default_font:
|
|
return ImageFont.load_default()
|
|
|
|
includes = [i.lower() for i in includes] if includes else []
|
|
excludes = [i.lower() for i in excludes] if excludes else []
|
|
|
|
logger.debug(f"Picking font by includes={includes} and excludes={excludes}.")
|
|
|
|
def includes_pattern(font):
|
|
return not includes or any(include in font for include in includes)
|
|
|
|
def excludes_pattern(font):
|
|
return not excludes or not any(exclude in font for exclude in excludes)
|
|
|
|
self.shuffle_fonts()
|
|
|
|
mask = lmap(lambda f: includes_pattern(f) and excludes_pattern(f), self.fonts_lower)
|
|
fonts = itertools.compress(self.fonts, mask)
|
|
fonts = keep(map(self.load_font, fonts))
|
|
# fonts = filter(self.font_is_renderable, fonts) # FIXME: this does not work
|
|
|
|
font = first(fonts)
|
|
logger.info(f"Using font: {font.getname()}")
|
|
return font
|
|
|
|
def shuffle_fonts(self):
|
|
l = lzip(self.fonts, self.fonts_lower)
|
|
rnd.shuffle(l)
|
|
self.fonts, self.fonts_lower = lzip(*l)
|
|
|
|
def pick_random_mono_space_font_available_on_system(self) -> ImageFont:
|
|
return self.pick_random_font_available_on_system(includes=["mono"], excludes=["oblique"])
|
|
|
|
@lru_cache(maxsize=None)
|
|
def load_font(self, font: str):
|
|
logger.trace(f"Loading font: {font}")
|
|
try:
|
|
return ImageFont.truetype(font, size=11)
|
|
except OSError:
|
|
return None
|
|
|
|
@lru_cache(maxsize=None)
|
|
def font_is_renderable(self, font):
|
|
text_size = self.draw.textsize("Test String", font=font)
|
|
return text_size[0] > 0 and text_size[1]
|
|
|
|
|
|
def get_fonts(path: Path = None) -> List[str]:
|
|
path = path or Path("/usr/share/fonts")
|
|
fonts = list(path.rglob("*.ttf"))
|
|
fonts = [font.name for font in fonts]
|
|
return fonts
|
|
|
|
|
|
@lru_cache(maxsize=None)
|
|
def get_font_picker(**kwargs):
|
|
return RandomFontPicker(**kwargs, return_default_font=True)
|
|
|
|
|
|
@lru_cache(maxsize=None)
|
|
def pick_random_mono_space_font_available_on_system(**kwargs):
|
|
font_picker = get_font_picker(**omit(kwargs, ["includes", "excludes"]))
|
|
return font_picker.pick_random_mono_space_font_available_on_system()
|
|
|
|
|
|
@lru_cache(maxsize=None)
|
|
def pick_random_font_available_on_system(**kwargs):
|
|
kwargs["excludes"] = (
|
|
*kwargs.get(
|
|
"excludes",
|
|
),
|
|
"Kinnari",
|
|
"KacstOne",
|
|
)
|
|
font_picker = get_font_picker(**omit(kwargs, ["includes", "excludes"]))
|
|
return font_picker.pick_random_font_available_on_system(**project(kwargs, ["includes", "excludes"]))
|
|
|
|
|
|
class RandomPlot(RandomContentRectangle):
|
|
def __init__(self, x1, y1, x2, y2, seed=None):
|
|
super().__init__(x1, y1, x2, y2, seed=seed)
|
|
|
|
cmap_name = self.random.choice(
|
|
[
|
|
"viridis",
|
|
"plasma",
|
|
"inferno",
|
|
"magma",
|
|
"cividis",
|
|
],
|
|
)
|
|
self.cmap = plt.get_cmap(cmap_name)
|
|
|
|
def __call__(self, *args, **kwargs):
|
|
pass
|
|
|
|
def generate_random_plot(self, rectangle: Rectangle):
|
|
# noinspection PyArgumentList
|
|
rnd.choice(
|
|
[
|
|
self.generate_random_line_plot,
|
|
self.generate_random_bar_plot,
|
|
self.generate_random_scatter_plot,
|
|
self.generate_random_histogram,
|
|
self.generate_random_pie_chart,
|
|
]
|
|
)(rectangle)
|
|
|
|
def generate_random_bar_plot(self, rectangle: Rectangle):
|
|
x = sorted(np.random.randint(low=1, high=11, size=5))
|
|
y = np.random.randint(low=1, high=11, size=5)
|
|
self.__generate_random_plot(plt.bar, rectangle, x, y)
|
|
|
|
def generate_random_line_plot(self, rectangle: Rectangle):
|
|
f = rnd.choice([np.sin, np.cos, np.tan, np.exp, np.log, np.sqrt, np.square])
|
|
|
|
x = np.linspace(0, 10, 100)
|
|
y = f(x)
|
|
|
|
self.__generate_random_plot(plt.plot, rectangle, x, y)
|
|
|
|
def generate_random_scatter_plot(self, rectangle: Rectangle):
|
|
x = np.random.normal(size=100)
|
|
y = np.random.normal(size=100)
|
|
self.__generate_random_plot(plt.scatter, rectangle, x, y)
|
|
|
|
def generate_random_histogram(self, rectangle: Rectangle):
|
|
x = np.random.normal(size=100)
|
|
self.__generate_random_plot(plt.hist, rectangle, x, 10)
|
|
|
|
def generate_random_pie_chart(self, rectangle: Rectangle):
|
|
x = np.random.uniform(size=10)
|
|
self.__generate_random_plot(plt.pie, rectangle, x, None, plot_kwargs=self.generate_plot_kwargs(keywords=["a"]))
|
|
|
|
def generate_plot_kwargs(self, keywords=None):
|
|
|
|
kwargs = {
|
|
"color": rnd.choice(self.cmap.colors),
|
|
"linestyle": rnd.choice(["-", "--", "-.", ":"]),
|
|
"linewidth": rnd.uniform(0.5, 2),
|
|
}
|
|
|
|
return kwargs if not keywords else {k: v for k, v in kwargs.items() if k in keywords}
|
|
|
|
def __generate_random_plot(self, plot_fn, rectangle: Rectangle, x, y, plot_kwargs=None):
|
|
|
|
plot_kwargs = self.generate_plot_kwargs() if plot_kwargs is None else plot_kwargs
|
|
|
|
fig, ax = plt.subplots()
|
|
fig.set_size_inches(rectangle.width / 100, rectangle.height / 100)
|
|
fig.tight_layout(pad=0)
|
|
|
|
plot_fn(x, y, **plot_kwargs)
|
|
ax.set_facecolor("none")
|
|
|
|
maybe() and ax.set_title("Figure Title")
|
|
|
|
# disable axes at random
|
|
maybe() and ax.set_xticks([])
|
|
maybe() and ax.set_yticks([])
|
|
maybe() and ax.set_xticklabels([])
|
|
maybe() and ax.set_yticklabels([])
|
|
maybe() and ax.set_xlabel("")
|
|
maybe() and ax.set_ylabel("")
|
|
maybe() and ax.set_title("")
|
|
maybe() and ax.set_frame_on(False)
|
|
|
|
# remove spines at random
|
|
maybe() and (ax.spines["top"].set_visible(False) or ax.spines["right"].set_visible(False))
|
|
|
|
image = dump_plt_to_image(rectangle)
|
|
assert image.mode == "RGBA"
|
|
|
|
self.content = image if not self.content else superimpose_texture_with_transparency(self.content, image)
|
|
|
|
|
|
def maybe():
|
|
return rnd.random() > 0.9
|
|
|
|
|
|
def generate_random_text_block(rectangle: Rectangle, n_sentences=3000) -> ContentRectangle:
|
|
block = TextBlock(
|
|
*rectangle.coords,
|
|
font=pick_random_font_available_on_system(
|
|
includes=("serif", "sans-serif"),
|
|
excludes=("bold", "mono", "italic", "oblique", "cursive"),
|
|
),
|
|
font_size=30, # TODO: De-hardcode font size... Seems to have no effect on top of that
|
|
)
|
|
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
|
block.generate_random_text(rectangle, n_sentences)
|
|
return block
|
|
|
|
|
|
def generate_random_image_caption(rectangle: Rectangle) -> ContentRectangle:
|
|
return generate_random_caption(rectangle, f"Fig {rnd.randint(1, 20)}")
|
|
|
|
|
|
def generate_random_table_caption(rectangle: Rectangle) -> ContentRectangle:
|
|
return generate_random_caption(rectangle, f"Tabl {rnd.randint(1, 20)}")
|
|
|
|
|
|
def generate_random_caption(rectangle: Rectangle, caption_start, n_sentences=1000) -> ContentRectangle:
|
|
block = TextBlock(
|
|
*rectangle.coords,
|
|
text_generator=CaptionGenerator(caption_start=caption_start),
|
|
font=pick_random_font_available_on_system(
|
|
includes=("italic",),
|
|
excludes=("bold", "mono"),
|
|
),
|
|
font_size=100, # TODO: De-hardcode font size... Seems to have no effect on top of that
|
|
)
|
|
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
|
block.generate_random_text(rectangle, n_sentences)
|
|
return block
|
|
|
|
|
|
def generate_text_block(rectangle: Rectangle, text) -> ContentRectangle:
|
|
block = TextBlock(
|
|
*rectangle.coords,
|
|
font=pick_random_font_available_on_system(
|
|
includes=("serif", "sans-serif", "bold"),
|
|
excludes=("mono", "italic", "oblique", "cursive"),
|
|
),
|
|
font_size=30, # TODO: De-hardcode font size... Seems to have no effect on top of that
|
|
)
|
|
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
|
block.put_text(text, rectangle)
|
|
return block
|
|
|
|
|
|
def write_lines_to_image(lines: List[str], rectangle: Rectangle, font=None) -> Image.Image:
|
|
def write_line(line, line_number):
|
|
draw.text((0, line_number * text_size), line, font=font, fill=(0, 0, 0, 255))
|
|
|
|
font = font or pick_random_mono_space_font_available_on_system()
|
|
|
|
image = Image.new("RGBA", (rectangle.width, rectangle.height), (0, 255, 255, 0))
|
|
draw = ImageDraw.Draw(image)
|
|
text_size = draw.textsize(first(lines), font=font)[1]
|
|
|
|
for line_number, line in enumerate(lines):
|
|
write_line(line, line_number)
|
|
|
|
return image
|
|
|
|
|
|
class LineFormatter(abc.ABC):
|
|
pass
|
|
|
|
|
|
class IdentityLineFormatter(LineFormatter):
|
|
def __init__(self):
|
|
pass
|
|
|
|
def __call__(self, lines, last_full):
|
|
return lines, last_full
|
|
|
|
|
|
class ParagraphLineFormatter(LineFormatter):
|
|
def __init__(self, blank_line_percentage=None):
|
|
self.blank_line_percentage = blank_line_percentage or rnd.uniform(0, 0.5)
|
|
|
|
def __call__(self, lines, last_full):
|
|
return self.format_lines(lines, last_full)
|
|
|
|
def format_lines(self, lines, last_full):
|
|
def truncate_current_line():
|
|
return rnd.random() < self.blank_line_percentage and last_full
|
|
|
|
# This is meant to be read from the bottom up.
|
|
current_line_shall_not_be_a_full_line = truncate_current_line()
|
|
line_formatter = self.truncate_line if current_line_shall_not_be_a_full_line else identity
|
|
format_current_line = compose(line_formatter, first)
|
|
move_current_line_to_back = star(rconj)
|
|
split_first_line_from_lines_and_format_the_former = juxt(rest, format_current_line)
|
|
split_off_current_line_then_format_it_then_move_it_to_the_back = rcompose(
|
|
split_first_line_from_lines_and_format_the_former,
|
|
move_current_line_to_back,
|
|
)
|
|
current_line_is_a_full_line = not current_line_shall_not_be_a_full_line
|
|
# Start reading here and move up.
|
|
return split_off_current_line_then_format_it_then_move_it_to_the_back(lines), current_line_is_a_full_line
|
|
|
|
def format_line(self, line, full=True):
|
|
line = self.truncate_line(line) if not full else line
|
|
return line, full
|
|
|
|
def truncate_line(self, line: str):
|
|
n_trailing_words = rnd.randint(0, 4)
|
|
line = " ".join(line.split()[-n_trailing_words - 1 : -1]).replace(".", "")
|
|
line = line + ".\n" if line else line
|
|
return line
|
|
|
|
|
|
class TextBlockGenerator(abc.ABC):
|
|
pass
|
|
|
|
|
|
class ParagraphGenerator(TextBlockGenerator):
|
|
def __init__(self):
|
|
self.line_formatter = ParagraphLineFormatter(blank_line_percentage=rnd.uniform(0, 0.5))
|
|
|
|
def __call__(self, rectangle, n_sentences):
|
|
return self.generate_paragraph(rectangle, n_sentences)
|
|
|
|
def generate_paragraph(self, rectangle, n_sentences):
|
|
lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences)
|
|
return lines
|
|
|
|
|
|
class CaptionGenerator(TextBlockGenerator):
|
|
def __init__(self, caption_start=None):
|
|
self.line_formatter = IdentityLineFormatter()
|
|
self.caption_start = caption_start or f"Fig {rnd.randint(1, 20)}"
|
|
|
|
def __call__(self, rectangle, n_sentences):
|
|
return self.generate_paragraph(rectangle, n_sentences)
|
|
|
|
def generate_paragraph(self, rectangle, n_sentences):
|
|
lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences)
|
|
first_line_modified = f"{self.caption_start}.: {first(lines)}"
|
|
lines = conj(first_line_modified, rest(lines))
|
|
return lines
|
|
|
|
|
|
class TextBlock(ContentRectangle):
|
|
def __init__(self, x1, y1, x2, y2, text_generator=None, font=None, font_size=None):
|
|
super().__init__(x1, y1, x2, y2)
|
|
self.font = font or ImageFont.load_default() # pick_random_font_available_on_system(size=font_size)
|
|
self.text_generator = text_generator or ParagraphGenerator()
|
|
|
|
def __call__(self, *args, **kwargs):
|
|
pass
|
|
|
|
def generate_random_text(self, rectangle: Rectangle, n_sentences=3000):
|
|
lines = self.text_generator(rectangle, n_sentences)
|
|
image = write_lines_to_image(lines, rectangle, self.font)
|
|
return self.__put_content(image)
|
|
|
|
def put_text(self, text: str, rectangle: Rectangle):
|
|
|
|
text_width, text_height = self.font.getsize(text)
|
|
|
|
width_delta = text_width - rectangle.width
|
|
height_delta = text_height - rectangle.height
|
|
|
|
image = Image.new("RGBA", (text_width, text_height), (0, 255, 255, 0))
|
|
|
|
if width_delta > 0 or height_delta > 0:
|
|
image = image.resize((int(rectangle.width * 0.9), text_height))
|
|
|
|
draw = ImageDraw.Draw(image)
|
|
draw.text((0, 0), text, font=self.font, fill=(0, 0, 0, 255))
|
|
return self.__put_content(image)
|
|
|
|
def __put_content(self, image: Image.Image):
|
|
self.content = image if not self.content else superimpose_texture_with_transparency(self.content, image)
|
|
assert self.content.mode == "RGBA"
|
|
return self
|
|
|
|
|
|
def generate_random_text_lines(rectangle: Rectangle, line_formatter=identity, n_sentences=3000) -> List[str]:
|
|
text = Faker().paragraph(nb_sentences=n_sentences, variable_nb_sentences=False, ext_word_list=None)
|
|
unformatted_lines = textwrap.wrap(text, width=rectangle.width, break_long_words=False)
|
|
# each iteration of the line formatter function formats one more line and adds it to the back of the list
|
|
formatted_lines_generator = iterate(star(line_formatter), (unformatted_lines, True))
|
|
# hence do as many iterations as there are lines in the rectangle
|
|
lines_per_iteration = take(len(unformatted_lines), formatted_lines_generator)
|
|
# and then take the lines from the last iteration of the function
|
|
formatted_lines, _ = last(lines_per_iteration)
|
|
|
|
return formatted_lines
|
|
|
|
|
|
def paste_content(page, content_box: ContentRectangle):
|
|
# assert page.mode == "RGB"
|
|
assert content_box.content.mode == "RGBA"
|
|
page.paste(content_box.content, (content_box.x1, content_box.y1), content_box.content)
|
|
return page
|
|
|
|
|
|
def paste_contents(page, contents: Iterable[ContentRectangle]):
|
|
for content in contents:
|
|
paste_content(page, content)
|
|
return page
|
|
|
|
|
|
class PagePartitioner(abc.ABC):
|
|
def __init__(self):
|
|
self.left_margin_percentage = 0.05
|
|
self.right_margin_percentage = 0.05
|
|
self.top_margin_percentage = 0.1
|
|
self.bottom_margin_percentage = 0.1
|
|
|
|
self.recursive_margin_percentage = 0.007
|
|
self.max_recursion_depth = 3
|
|
self.initial_recursion_probability = 1
|
|
self.recursion_probability_decay = 0.1
|
|
|
|
def __call__(self, page: Image.Image) -> List[Rectangle]:
|
|
left_margin = int(page.width * self.left_margin_percentage)
|
|
right_margin = int(page.width * self.right_margin_percentage)
|
|
top_margin = int(page.height * self.top_margin_percentage)
|
|
bottom_margin = int(page.height * self.bottom_margin_percentage)
|
|
|
|
box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin)
|
|
boxes = lflatten(self.generate_content_boxes(box))
|
|
# boxes = self.drop_small_boxes(boxes, *page.size)
|
|
# boxes = merge_related_rectangles(boxes)
|
|
# boxes = list(boxes)
|
|
return boxes
|
|
|
|
@abc.abstractmethod
|
|
def generate_content_boxes(self, box: Rectangle, depth=0):
|
|
raise NotImplementedError
|
|
|
|
def generate_child_boxes(self, box: Rectangle, axis, split_percentage=0.5) -> Tuple[Rectangle, Rectangle]:
|
|
assert axis in ["x", "y"]
|
|
|
|
edge_anchor_point, edge_length = (box.x1, box.width) if axis == "x" else (box.y1, box.height)
|
|
split_coordinate = split_percentage * edge_length + edge_anchor_point
|
|
child_boxes = get_child_boxes(box, split_coordinate, axis, self.recursive_margin_percentage)
|
|
return child_boxes
|
|
|
|
def recurse(self, depth):
|
|
return rnd.random() <= self.recursion_probability(depth)
|
|
|
|
def recursion_probability(self, depth):
|
|
return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth
|
|
|
|
|
|
class RandomPagePartitioner(PagePartitioner):
|
|
def __init__(self):
|
|
super().__init__()
|
|
|
|
def generate_content_boxes(self, box: Rectangle, depth=0):
|
|
if depth >= self.max_recursion_depth:
|
|
yield box
|
|
else:
|
|
child_boxes = self.generate_child_boxes(
|
|
box,
|
|
axis=rnd.choice(["x", "y"]),
|
|
split_percentage=rnd.uniform(0.3, 0.7),
|
|
)
|
|
if self.recurse(depth):
|
|
yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
|
|
else:
|
|
yield child_boxes
|
|
|
|
|
|
class TwoColumnPagePartitioner(PagePartitioner):
|
|
def __init__(self):
|
|
super().__init__()
|
|
# self.recursive_margin_percentage = 0.1
|
|
# self.left_margin_percentage = 0.1
|
|
# self.right_margin_percentage = 0.1
|
|
self.max_recursion_depth = 3
|
|
|
|
def generate_content_boxes(self, box: Rectangle, depth=0):
|
|
if depth >= self.max_recursion_depth:
|
|
yield box
|
|
|
|
else:
|
|
if depth == 0:
|
|
axis = "x"
|
|
split_percentage = 0.5
|
|
else:
|
|
axis = "y"
|
|
split_percentage = rnd.choice([0.3, 0.7])
|
|
|
|
child_boxes = self.generate_child_boxes(box, axis=axis, split_percentage=split_percentage)
|
|
|
|
yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
|
|
|
|
|
|
def get_child_boxes(box: Rectangle, split_coordinate, axis, margin_percentage) -> Tuple[Rectangle, Rectangle]:
|
|
assert axis in ["x", "y"]
|
|
|
|
def low(point_1d):
|
|
return point_1d * (1 + margin_percentage)
|
|
|
|
def high(point_1d):
|
|
return point_1d * (1 - margin_percentage)
|
|
|
|
if axis == "x":
|
|
return (
|
|
Rectangle(low(box.x1), low(box.y1), high(split_coordinate), high(box.y2)),
|
|
Rectangle(low(split_coordinate), low(box.y1), high(box.x2), high(box.y2)),
|
|
)
|
|
else:
|
|
return (
|
|
Rectangle(low(box.x1), low(box.y1), high(box.x2), high(split_coordinate)),
|
|
Rectangle(low(box.x1), low(split_coordinate), high(box.x2), high(box.y2)),
|
|
)
|
|
|
|
|
|
def drop_small_boxes(boxes: Iterable[Rectangle], page_width, page_height, min_percentage=0.13) -> List[Rectangle]:
|
|
min_width = page_width * min_percentage
|
|
min_height = page_height * min_percentage
|
|
|
|
def small(box: Rectangle):
|
|
return box.width < min_width or box.height < min_height
|
|
|
|
return lremove(small, boxes)
|
|
|
|
|
|
def draw_boxes(page: Image, boxes: Iterable[Rectangle]):
|
|
# page = draw_rectangles(page, boxes, filled=False, annotate=True)
|
|
show_image(page, backend="pil")
|