1768 lines
56 KiB
Python
1768 lines
56 KiB
Python
import abc
|
|
import io
|
|
import itertools
|
|
import random
|
|
import string
|
|
import sys
|
|
import textwrap
|
|
from copy import deepcopy
|
|
from enum import Enum
|
|
from functools import lru_cache, partial
|
|
from math import sqrt
|
|
from pathlib import Path
|
|
from typing import Tuple, Iterable, List
|
|
|
|
import albumentations as A
|
|
import blend_modes
|
|
import cv2 as cv
|
|
import numpy as np
|
|
import pandas as pd
|
|
import pytest
|
|
from PIL import Image, ImageOps, ImageFont, ImageDraw, ImageEnhance
|
|
from PIL.Image import Transpose
|
|
from faker import Faker
|
|
from loguru import logger
|
|
from matplotlib import pyplot as plt
|
|
from matplotlib.colors import ListedColormap
|
|
from tabulate import tabulate
|
|
|
|
from cv_analysis.table_parsing import isolate_vertical_and_horizontal_components
|
|
from cv_analysis.utils import star, rconj, conj
|
|
from cv_analysis.utils.common import normalize_to_gray_scale
|
|
from cv_analysis.utils.conversion import normalize_image_format_to_array, normalize_image_format_to_pil
|
|
from cv_analysis.utils.merging import merge_related_rectangles
|
|
from cv_analysis.utils.postprocessing import remove_overlapping, remove_included
|
|
from cv_analysis.utils.spacial import area
|
|
|
|
logger.remove()
|
|
logger.add(sys.stderr, level="INFO")
|
|
|
|
random_seed = random.randint(0, 2**32 - 1)
|
|
# random_seed = 3896311122
|
|
# random_seed = 1986343479
|
|
|
|
# random_seed = 273244862 # empty large table
|
|
# random_seed = 3717442900
|
|
# random_seed = 2508340737
|
|
|
|
|
|
# random_seed = 2212357755
|
|
random_seed = 3400335399
|
|
|
|
rnd = random.Random(random_seed)
|
|
logger.info(f"Random seed: {random_seed}")
|
|
|
|
#
|
|
# transform = A.Compose(
|
|
# [
|
|
# # geometric transforms
|
|
# A.HorizontalFlip(p=0.2),
|
|
# A.RandomRotate90(p=0.2),
|
|
# A.VerticalFlip(p=0.2),
|
|
# # brightness and contrast transforms
|
|
# A.OneOf(
|
|
# [
|
|
# A.RandomGamma(p=0.5),
|
|
# A.RandomBrightnessContrast(p=0.5),
|
|
# ],
|
|
# p=0.5,
|
|
# ),
|
|
# # noise transforms
|
|
# A.SomeOf(
|
|
# [
|
|
# A.Emboss(p=0.05),
|
|
# A.ImageCompression(p=0.05),
|
|
# A.PixelDropout(p=0.05),
|
|
# ],
|
|
# p=0.5,
|
|
# n=2,
|
|
# ),
|
|
# # color transforms
|
|
# A.SomeOf(
|
|
# [
|
|
# A.ColorJitter(p=1),
|
|
# A.RGBShift(p=1, r_shift_limit=0.1, g_shift_limit=0.1, b_shift_limit=0.1),
|
|
# A.ChannelShuffle(p=1),
|
|
# ],
|
|
# p=0.5,
|
|
# n=3, # 3 => all
|
|
# ),
|
|
# # blurring and sharpening transforms
|
|
# A.OneOf(
|
|
# [
|
|
# A.GaussianBlur(p=0.05),
|
|
# A.MotionBlur(p=0.05, blur_limit=21),
|
|
# A.Sharpen(p=0.05),
|
|
# ],
|
|
# p=0.5,
|
|
# ),
|
|
# # environmental transforms
|
|
# A.OneOf(
|
|
# [
|
|
# A.RandomRain(p=0.2, rain_type="drizzle"),
|
|
# A.RandomFog(p=0.2, fog_coef_upper=0.4),
|
|
# A.RandomSnow(p=0.2),
|
|
# ],
|
|
# p=0.5,
|
|
# ),
|
|
# ],
|
|
# p=0.5,
|
|
# )
|
|
from funcy import (
|
|
juxt,
|
|
compose,
|
|
identity,
|
|
lflatten,
|
|
lmap,
|
|
first,
|
|
iterate,
|
|
take,
|
|
last,
|
|
rest,
|
|
rcompose,
|
|
lsplit,
|
|
lfilter,
|
|
lzip,
|
|
keep,
|
|
repeatedly,
|
|
mapcat,
|
|
omit,
|
|
project,
|
|
complement,
|
|
lremove,
|
|
chunks,
|
|
)
|
|
|
|
from cv_analysis.locations import TEST_PAGE_TEXTURES_DIR
|
|
|
|
# transform = A.Compose(
|
|
# [
|
|
# # brightness and contrast transforms
|
|
# A.OneOf(
|
|
# [
|
|
# A.RandomGamma(p=0.2),
|
|
# A.RandomBrightnessContrast(p=0.2, brightness_limit=0.05, contrast_limit=0.05),
|
|
# ],
|
|
# p=0.5,
|
|
# ),
|
|
# # color transforms
|
|
# A.SomeOf(
|
|
# [
|
|
# A.ColorJitter(p=1),
|
|
# A.RGBShift(p=1, r_shift_limit=0.3, g_shift_limit=0.3, b_shift_limit=0.3),
|
|
# A.ChannelShuffle(p=1),
|
|
# ],
|
|
# p=1.0,
|
|
# n=3, # 3 => all
|
|
# ),
|
|
# # # blurring and sharpening transforms
|
|
# # A.OneOf(
|
|
# # [
|
|
# # A.GaussianBlur(p=0.05),
|
|
# # A.MotionBlur(p=0.05, blur_limit=21),
|
|
# # A.Sharpen(p=0.05),
|
|
# # ],
|
|
# # p=0.0,
|
|
# # ),
|
|
# ]
|
|
# )
|
|
from cv_analysis.utils.display import show_image
|
|
from cv_analysis.utils.rectangle import Rectangle
|
|
|
|
transform = A.Compose(
|
|
[
|
|
# A.ColorJitter(p=1),
|
|
]
|
|
)
|
|
|
|
|
|
Color = Tuple[int, int, int]
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
# "rough_grain",
|
|
# "plain",
|
|
# "digital",
|
|
"crumpled",
|
|
]
|
|
)
|
|
def base_texture(request, size):
|
|
texture = Image.open(TEST_PAGE_TEXTURES_DIR / (request.param + ".jpg"))
|
|
texture = texture.resize(size)
|
|
# texture.putalpha(255) # ISSUE 1!!!
|
|
return texture
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
"portrait",
|
|
# "landscape",
|
|
]
|
|
)
|
|
def orientation(request):
|
|
return request.param
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
# 30,
|
|
100,
|
|
]
|
|
)
|
|
def dpi(request):
|
|
return request.param
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
# "brown",
|
|
# "sepia",
|
|
# "gray",
|
|
"white",
|
|
# "light_red",
|
|
# "light_blue",
|
|
]
|
|
)
|
|
def color_name(request):
|
|
return request.param
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
# "smooth",
|
|
# "coarse",
|
|
"neutral",
|
|
]
|
|
)
|
|
def texture_name(request):
|
|
return request.param
|
|
|
|
|
|
@pytest.fixture(
|
|
params=[
|
|
# 30,
|
|
70,
|
|
# 150,
|
|
]
|
|
)
|
|
def color_intensity(request):
|
|
return request.param
|
|
|
|
|
|
def random_flip(image):
|
|
if rnd.choice([True, False]):
|
|
image = image.transpose(Transpose.FLIP_LEFT_RIGHT)
|
|
if rnd.choice([True, False]):
|
|
image = image.transpose(Transpose.FLIP_TOP_BOTTOM)
|
|
return image
|
|
|
|
|
|
@pytest.fixture
|
|
def color(color_name):
|
|
return {
|
|
"brown": "#7d6c5b",
|
|
"sepia": "#b8af88",
|
|
"gray": "#9c9c9c",
|
|
"white": "#ffffff",
|
|
"light_red": "#d68c8b",
|
|
"light_blue": "#8bd6d6",
|
|
}[color_name]
|
|
|
|
|
|
@pytest.fixture
|
|
def texture_fn(texture_name, size):
|
|
if texture_name == "smooth":
|
|
fn = blur
|
|
elif texture_name == "coarse":
|
|
fn = compose(overlay, juxt(blur, sharpen))
|
|
else:
|
|
fn = identity
|
|
|
|
return normalize_image_function(fn)
|
|
|
|
|
|
def blur(image: np.ndarray):
|
|
return cv.blur(image, (3, 3))
|
|
|
|
|
|
def normalize_image_function(func):
|
|
def inner(image):
|
|
image = normalize_image_format_to_array(image)
|
|
image = func(image)
|
|
image = normalize_image_format_to_pil(image)
|
|
return image
|
|
|
|
return inner
|
|
|
|
|
|
def sharpen(image: np.ndarray):
|
|
return cv.filter2D(image, -1, np.array([[-1, -1, -1], [-1, 6, -1], [-1, -1, -1]]))
|
|
|
|
|
|
def overlay(images, mode=np.sum):
|
|
assert mode in [np.sum, np.max]
|
|
images = np.stack(list(images))
|
|
image = mode(images, axis=0)
|
|
image = (image / image.max() * 255).astype(np.uint8)
|
|
return image
|
|
|
|
|
|
@pytest.fixture
|
|
def texture(tinted_blank_page, base_texture):
|
|
texture = superimpose_texture_with_transparency(base_texture, tinted_blank_page)
|
|
return texture
|
|
|
|
|
|
@pytest.fixture
|
|
def tinted_blank_page(size, color, color_intensity):
|
|
tinted_page = Image.new("RGBA", size, color)
|
|
tinted_page.putalpha(color_intensity)
|
|
return tinted_page
|
|
|
|
|
|
@pytest.fixture
|
|
def blank_page(size, color, color_intensity):
|
|
rnd.seed(random_seed)
|
|
page = Image.new("RGBA", size, color=(255, 255, 255, 0))
|
|
return page
|
|
|
|
|
|
def tint_image(src, color="#FFFFFF"):
|
|
src.load()
|
|
r, g, b, alpha = src.split()
|
|
gray = ImageOps.grayscale(src)
|
|
result = ImageOps.colorize(gray, (0, 0, 0), color)
|
|
result.putalpha(alpha)
|
|
return result
|
|
|
|
|
|
def color_shift_array(image: np.ndarray, color: Color):
|
|
"""Creates a 3-tensor from a 2-tensor by stacking the 2-tensor three times weighted by the color tuple."""
|
|
assert image.ndim == 3
|
|
assert image.shape[-1] == 3
|
|
assert isinstance(color, tuple)
|
|
assert max(color) <= 255
|
|
assert image.max() <= 255
|
|
|
|
color = np.array(color)
|
|
weights = color / color.sum() / 10
|
|
assert max(weights) <= 1
|
|
|
|
colored = (image * weights).astype(np.uint8)
|
|
|
|
assert colored.shape == image.shape
|
|
|
|
return colored
|
|
|
|
|
|
@pytest.fixture
|
|
def size(dpi, orientation):
|
|
if orientation == "portrait":
|
|
size = (8.5 * dpi, 11 * dpi)
|
|
elif orientation == "landscape":
|
|
size = (11 * dpi, 8.5 * dpi)
|
|
else:
|
|
raise ValueError(f"Unknown orientation: {orientation}")
|
|
size = tuple(map(int, size))
|
|
return size
|
|
|
|
|
|
def superimpose_texture_with_transparency(
|
|
page: Image,
|
|
texture: Image,
|
|
crop_to_content=True,
|
|
pad=True,
|
|
) -> Image:
|
|
"""Superimposes a noise image with transparency onto a page image.
|
|
|
|
TODO: Rename page and texture to something more generic.
|
|
|
|
Args:
|
|
page: The page image.
|
|
texture: The texture image.
|
|
crop_to_content: If True, the texture will be cropped to content (i.e. the bounding box of all non-transparent
|
|
parts of the texture image).
|
|
pad: If True, the texture will be padded to the size of the page.
|
|
|
|
Returns:
|
|
Image where the texture is superimposed onto the page.
|
|
"""
|
|
page = normalize_image_format_to_pil(page)
|
|
texture = normalize_image_format_to_pil(texture)
|
|
|
|
if crop_to_content:
|
|
texture = texture.crop(texture.getbbox())
|
|
|
|
if page.size != texture.size:
|
|
logger.trace(f"Size of page and texture do not match: {page.size} != {texture.size}")
|
|
if pad:
|
|
logger.trace(f"Padding texture before pasting to fit size {page.size}")
|
|
texture = pad_image_to_size(texture, page.size)
|
|
else:
|
|
logger.trace(f"Resizing texture before pasting to fit size {page.size}")
|
|
texture = texture.resize(page.size)
|
|
|
|
assert page.size == texture.size
|
|
assert texture.mode == "RGBA"
|
|
|
|
page.paste(texture, (0, 0), texture)
|
|
return page
|
|
|
|
|
|
def pad_image_to_size(image: Image, size: Tuple[int, int]) -> Image:
|
|
"""Pads an image to a given size."""
|
|
if image.size == size:
|
|
return image
|
|
|
|
if image.size[0] > size[0] or image.size[1] > size[1]:
|
|
raise ValueError(f"Image size {image.size} is larger than target size {size}.")
|
|
|
|
# get average alpha value from image
|
|
# alpha = int(np.mean(np.array(image.split()[-1]))) or 255
|
|
# padded = Image.new(image.mode, size, color=(255, 255, 255, alpha))
|
|
padded = Image.new(image.mode, size, color=255)
|
|
|
|
pasting_coords = compute_pasting_coordinates(image, padded)
|
|
assert image.mode == "RGBA"
|
|
padded.paste(image, pasting_coords)
|
|
return padded
|
|
|
|
|
|
def compute_pasting_coordinates(smaller: Image, larger: Image.Image):
|
|
"""Computes the coordinates for centrally pasting a smaller image onto a larger image."""
|
|
return abs(larger.width - smaller.width) // 2, abs(larger.height - smaller.height) // 2
|
|
|
|
|
|
@pytest.fixture
|
|
def page_with_opaque_content(
|
|
blank_page,
|
|
tinted_blank_page,
|
|
texture,
|
|
texture_fn,
|
|
) -> Tuple[np.ndarray, Iterable[Rectangle]]:
|
|
"""Creates a page with content"""
|
|
page_partitioner = rnd.choice(
|
|
[
|
|
TwoColumnPagePartitioner(),
|
|
# RandomPagePartitioner(),
|
|
]
|
|
)
|
|
|
|
texture = random_flip(texture)
|
|
texture = texture_fn(texture)
|
|
|
|
boxes = page_partitioner(texture)
|
|
content_generator = ContentGenerator()
|
|
boxes = content_generator(boxes)
|
|
page = paste_contents(texture, boxes)
|
|
|
|
return page, boxes
|
|
|
|
|
|
@pytest.fixture
|
|
def page_with_translucent_content(
|
|
blank_page, tinted_blank_page, texture, texture_fn
|
|
) -> Tuple[np.ndarray, List[Rectangle]]:
|
|
"""Creates a page with content"""
|
|
page_partitioner = rnd.choice(
|
|
[
|
|
TwoColumnPagePartitioner(),
|
|
# RandomPagePartitioner(),
|
|
]
|
|
)
|
|
|
|
boxes = page_partitioner(blank_page)
|
|
content_generator = ContentGenerator()
|
|
boxes = content_generator(boxes)
|
|
page_content = paste_contents(blank_page, boxes)
|
|
|
|
texture = random_flip(texture)
|
|
texture = texture_fn(texture)
|
|
|
|
########## A
|
|
# page_content = multiply_alpha_where_alpha_channel_is_nonzero(page_content, factor=0.6)
|
|
# page = superimpose_texture_with_transparency(texture, page_content, crop_to_content=False)
|
|
########## B
|
|
# texture.putalpha(255)
|
|
# # texture.show()
|
|
# page_content.show()
|
|
# page = blend(*map(np.array, (page_content, texture)))
|
|
########## C
|
|
texture.putalpha(255)
|
|
page_content.putalpha(255)
|
|
factor = 1.2
|
|
enhancer = ImageEnhance.Contrast(texture)
|
|
texture = enhancer.enhance(factor)
|
|
|
|
page = blend_modes.multiply(
|
|
*map(
|
|
to_array,
|
|
(
|
|
page_content,
|
|
texture,
|
|
),
|
|
),
|
|
opacity=1,
|
|
).astype(np.uint8)
|
|
##########
|
|
|
|
return page, boxes
|
|
|
|
|
|
@pytest.fixture
|
|
def page_with_content(
|
|
page_with_translucent_content,
|
|
# page_with_opaque_content,
|
|
) -> np.ndarray:
|
|
|
|
page, boxes = page_with_translucent_content
|
|
# page, boxes = page_with_opaque_content
|
|
|
|
draw_boxes(page, boxes)
|
|
|
|
return page
|
|
|
|
|
|
def to_array(image: Image) -> np.ndarray:
|
|
"""Converts a PIL image to a numpy array."""
|
|
return np.array(image).astype(np.float32)
|
|
|
|
|
|
def provide_image_format(required_format):
|
|
def inner(fn):
|
|
def inner(image, *args, **kwargs):
|
|
|
|
ret = fn(converter(image), *args, **kwargs)
|
|
|
|
if get_image_format(image) != required_format:
|
|
ret = back_converter(ret)
|
|
|
|
return ret
|
|
|
|
converter = {
|
|
"array": normalize_image_format_to_array,
|
|
"pil": normalize_image_format_to_pil,
|
|
}[required_format]
|
|
|
|
back_converter = {
|
|
"array": normalize_image_format_to_pil,
|
|
"pil": normalize_image_format_to_array,
|
|
}[required_format]
|
|
|
|
return inner
|
|
|
|
return inner
|
|
|
|
|
|
@provide_image_format("array")
|
|
def set_alpha_where_color_channels_are_nonzero(image: np.ndarray, alpha: int) -> np.ndarray:
|
|
"""Sets the alpha channel of an image to a given value where the color channels are nonzero."""
|
|
|
|
assert image.ndim == 3
|
|
assert image.shape[-1] == 4
|
|
assert 0 <= alpha <= 255
|
|
|
|
image = image.copy()
|
|
image[..., -1] = np.where(np.logical_or.reduce(image[..., :-1] > 0, axis=-1), alpha, image[..., -1])
|
|
return image
|
|
|
|
|
|
@provide_image_format("array")
|
|
def multiply_alpha_where_alpha_channel_is_nonzero(image: np.ndarray, factor: float) -> np.ndarray:
|
|
"""Increases the alpha channel of an image where the alpha channel is nonzero."""
|
|
|
|
assert image.ndim == 3
|
|
assert image.shape[-1] == 4
|
|
|
|
image = image.copy().astype(np.float32)
|
|
image[..., -1] = np.where(image[..., -1] > 0, image[..., -1] * factor, image[..., -1])
|
|
image[..., -1] = np.clip(image[..., -1], 0, 255)
|
|
|
|
assert image.max() <= 255
|
|
assert image.min() >= 0
|
|
return image
|
|
|
|
|
|
@provide_image_format("array")
|
|
def set_alpha_where_alpha_channel_is_nonzero(image: np.ndarray, alpha: int) -> np.ndarray:
|
|
"""Sets the alpha channel of an image to a given value where the alpha channel is nonzero."""
|
|
|
|
assert image.ndim == 3
|
|
assert image.shape[-1] == 4
|
|
assert 0 <= alpha <= 255
|
|
|
|
image = image.copy()
|
|
image[..., -1] = np.where(image[..., -1] > 0, alpha, image[..., -1])
|
|
return image
|
|
|
|
|
|
def get_image_format(image):
|
|
if isinstance(image, np.ndarray):
|
|
return "array"
|
|
elif isinstance(image, Image.Image):
|
|
return "pil"
|
|
else:
|
|
raise ValueError(f"Unknown image format: {type(image)}")
|
|
|
|
|
|
def blend(a: np.ndarray, b: np.ndarray):
|
|
"""Reference: https://stackoverflow.com/a/52143032"""
|
|
|
|
assert a.max() <= 255
|
|
assert a.min() >= 0
|
|
|
|
assert b.max() <= 255
|
|
assert b.min() >= 0
|
|
|
|
a = a.astype(float) / 255
|
|
b = b.astype(float) / 255 # make float on range 0-1
|
|
|
|
print(a.shape, b.shape)
|
|
|
|
mask = a >= 0.5 # generate boolean mask of everywhere a > 0.5
|
|
ab = np.zeros_like(a) # generate an output container for the blended image
|
|
|
|
# now do the blending
|
|
ab[~mask] = (2 * a * b)[~mask] # 2ab everywhere a<0.5
|
|
ab[mask] = (1 - 2 * (1 - a) * (1 - b))[mask] # else this
|
|
|
|
assert ab.max() <= 1
|
|
assert ab.min() >= 0
|
|
|
|
return (ab * 255).astype(np.uint8)
|
|
|
|
|
|
class ContentRectangle(Rectangle):
|
|
def __init__(self, x1, y1, x2, y2, content=None):
|
|
super().__init__(x1, y1, x2, y2)
|
|
self.content = content
|
|
|
|
def __repr__(self):
|
|
return f"{self.__class__.__name__}({self.x1}, {self.y1}, {self.x2}, {self.y2}, content={self.content})"
|
|
|
|
|
|
class ContentGenerator:
|
|
def __init__(self):
|
|
self.constrain_layouts = True
|
|
|
|
def __call__(self, boxes: List[Rectangle]) -> Image:
|
|
rnd.shuffle(boxes)
|
|
|
|
figure_boxes, text_boxes = lsplit(is_square_like, boxes)
|
|
|
|
if self.constrain_layouts:
|
|
figure_boxes = merge_related_rectangles(figure_boxes)
|
|
figure_boxes = lfilter(is_square_like, figure_boxes)
|
|
text_boxes = merge_related_rectangles(text_boxes)
|
|
|
|
boxes = list(
|
|
itertools.chain(
|
|
map(generate_random_text_block, every_nth(2, text_boxes)),
|
|
*zipmap(generate_recursive_random_table_with_caption, every_nth(2, text_boxes[1:])),
|
|
*zipmap(generate_recursive_random_table_with_caption, every_nth(2, figure_boxes)),
|
|
*zipmap(generate_random_plot_with_caption, every_nth(2, figure_boxes[1:])),
|
|
)
|
|
)
|
|
|
|
if self.constrain_layouts:
|
|
boxes = remove_included(boxes)
|
|
boxes = remove_overlapping(boxes)
|
|
|
|
return boxes
|
|
|
|
|
|
def zipmap(fn, boxes, n=2):
|
|
rets = lmap(list, zip(*map(fn, boxes)))
|
|
yield from repeatedly(lambda: [], n) if len(rets) < n else rets
|
|
|
|
|
|
def is_square_like(box: Rectangle):
|
|
return box.width / box.height > 0.5 and box.height / box.width > 0.5
|
|
|
|
|
|
def is_wide(box: Rectangle):
|
|
return box.width / box.height > 1.5
|
|
|
|
|
|
def is_tall(box: Rectangle):
|
|
return box.height / box.width > 1.5
|
|
|
|
|
|
def every_nth(n, iterable):
|
|
return itertools.islice(iterable, 0, None, n)
|
|
|
|
|
|
def generate_random_plot_with_caption(rectangle: Rectangle):
|
|
plot_box, caption_box = split_into_figure_and_caption(rectangle)
|
|
plot_box = generate_random_plot(plot_box)
|
|
caption_box = generate_random_image_caption(caption_box)
|
|
return plot_box, caption_box
|
|
|
|
|
|
# TODO: deduplicate with generate_random_table_with_caption
|
|
def generate_recursive_random_table_with_caption(rectangle: Rectangle):
|
|
table_box, caption_box = split_into_figure_and_caption(rectangle)
|
|
table_box = generate_recursive_random_table(table_box, double_rule=probably())
|
|
caption_box = generate_random_table_caption(caption_box)
|
|
return table_box, caption_box
|
|
|
|
|
|
def split_into_figure_and_caption(rectangle: Rectangle):
|
|
gap_percentage = rnd.uniform(0, 0.03)
|
|
split_point = rnd.uniform(0.5, 0.9)
|
|
figure_box = Rectangle(
|
|
rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y1 + rectangle.height * (split_point - gap_percentage / 2)
|
|
)
|
|
caption_box = Rectangle(
|
|
rectangle.x1, rectangle.y1 + rectangle.height * (split_point + gap_percentage / 2), rectangle.x2, rectangle.y2
|
|
)
|
|
return figure_box, caption_box
|
|
|
|
|
|
def generate_random_plot(rectangle: Rectangle) -> ContentRectangle:
|
|
block = RandomPlot(*rectangle.coords)
|
|
# block.content = attrgetter("content")(block)
|
|
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
|
print(block.content)
|
|
block.generate_random_plot(rectangle)
|
|
return block
|
|
|
|
|
|
def generate_random_table(rectangle: Rectangle) -> ContentRectangle:
|
|
block = RandomTable(*rectangle.coords)
|
|
block.content = (
|
|
rectangle.content if isinstance(rectangle, (ContentRectangle, RandomContentRectangle)) else None
|
|
) # TODO: Refactor
|
|
block.generate_random_table(rectangle)
|
|
return block
|
|
|
|
|
|
def generate_recursive_random_table(rectangle: Rectangle, **kwargs) -> ContentRectangle:
|
|
block = RecursiveRandomTable(*rectangle.coords, **kwargs)
|
|
if isinstance(rectangle, RecursiveRandomTable):
|
|
block.content = rectangle.content if rectangle.content else None # TODO: Refactor
|
|
block.generate_random_table()
|
|
return block
|
|
|
|
|
|
@lru_cache(maxsize=None)
|
|
def get_random_seed():
|
|
return rnd.randint(0, 2**32 - 1)
|
|
|
|
|
|
class RandomContentRectangle(ContentRectangle):
|
|
def __init__(self, x1, y1, x2, y2, content=None, seed=None):
|
|
super().__init__(x1, y1, x2, y2, content)
|
|
self.seed = seed or get_random_seed()
|
|
self.random = random.Random(self.seed)
|
|
|
|
|
|
class Size(Enum):
|
|
# SMALL = sqrt(100**2)
|
|
# MEDIUM = sqrt((100 * 3) ** 2)
|
|
# LARGE = sqrt((100 * 10) ** 2)
|
|
|
|
SMALL = 120
|
|
MEDIUM = 180
|
|
LARGE = 300
|
|
|
|
|
|
def get_size_class(rectangle: Rectangle):
|
|
size = get_size(rectangle)
|
|
if size < Size.SMALL.value:
|
|
return Size.SMALL
|
|
elif size < Size.LARGE.value:
|
|
return Size.MEDIUM
|
|
else:
|
|
return Size.LARGE
|
|
|
|
|
|
def get_size(rectangle: Rectangle):
|
|
size = sqrt(area(rectangle))
|
|
return size
|
|
|
|
|
|
def get_random_color_complementing_color_map(colormap):
|
|
def color_complement(r, g, b):
|
|
"""Reference: https://stackoverflow.com/a/40234924"""
|
|
|
|
def hilo(a, b, c):
|
|
if c < b:
|
|
b, c = c, b
|
|
if b < a:
|
|
a, b = b, a
|
|
if c < b:
|
|
b, c = c, b
|
|
return a + c
|
|
|
|
k = hilo(r, g, b)
|
|
return tuple(k - u for u in (r, g, b))
|
|
|
|
color = colormap(0.2)[:3]
|
|
color = [int(255 * v) for v in color]
|
|
color = color_complement(*color)
|
|
return color
|
|
|
|
|
|
@lru_cache(maxsize=None)
|
|
def get_random_background_color():
|
|
return tuple([*get_random_color_complementing_color_map(pick_colormap()), rnd.randint(100, 210)])
|
|
|
|
|
|
class RecursiveRandomTable(RandomContentRectangle):
|
|
def __init__(self, x1, y1, x2, y2, border_width=1, layout: str = None, double_rule=False):
|
|
"""A table with a random number of rows and columns, and random content in each cell.
|
|
|
|
Args:
|
|
x1: x-coordinate of the top-left corner
|
|
y1: y-coordinate of the top-left corner
|
|
x2: x-coordinate of the bottom-right corner
|
|
y2: y-coordinate of the bottom-right corner
|
|
border_width: width of the table border
|
|
layout: layout of the table, either "horizontal", "vertical", "closed", or "open"
|
|
double_rule: whether to use double rules as the top and bottom rules
|
|
"""
|
|
|
|
assert layout in [None, "horizontal", "vertical", "closed", "open"]
|
|
|
|
super().__init__(x1, y1, x2, y2)
|
|
|
|
self.double_rule = double_rule
|
|
self.double_rule_width = (3 * border_width) if self.double_rule else 0
|
|
|
|
self.n_columns = rnd.randint(1, max(self.width // 100, 1))
|
|
self.n_rows = rnd.randint(1, max((self.height - 2 * self.double_rule_width) // rnd.randint(17, 100), 1))
|
|
self.cell_size = (self.width / self.n_columns, (self.height - 2 * self.double_rule_width) / self.n_rows)
|
|
|
|
self.content = Image.new("RGBA", (self.width, self.height), (255, 255, 255, 0))
|
|
|
|
self.background_color = get_random_background_color()
|
|
|
|
logger.info(f"Background color: {self.background_color}")
|
|
|
|
self.layout = layout or self.pick_random_layout()
|
|
logger.debug(f"Layout: {self.layout}")
|
|
|
|
self.cells = None
|
|
|
|
def pick_random_layout(self):
|
|
|
|
if self.n_columns == 1 and self.n_rows == 1:
|
|
layout = "closed"
|
|
elif self.n_columns == 1:
|
|
layout = rnd.choice(["vertical", "closed"])
|
|
elif self.n_rows == 1:
|
|
layout = rnd.choice(["horizontal", "closed"])
|
|
else:
|
|
layout = rnd.choice(["closed", "horizontal", "vertical", "open"])
|
|
|
|
return layout
|
|
|
|
def generate_random_table(self):
|
|
cells = self.generate_table()
|
|
cells = list(self.fill_cells_with_content(cells))
|
|
self.cells = list(self.draw_cell_borders(cells))
|
|
|
|
self.content = paste_contents(self.content, cells)
|
|
assert self.content.mode == "RGBA"
|
|
|
|
def fill_cells_with_content(self, cells):
|
|
for cell in cells:
|
|
|
|
def inner(cell):
|
|
|
|
choice = rnd.choice(["text", "plot", "recurse", "plain_table", "blank"])
|
|
size = get_size(cell)
|
|
|
|
if size <= Size.SMALL.value:
|
|
words = generate_random_words(1, 3)
|
|
return generate_text_block(cell, " ".join(words))
|
|
|
|
elif size <= Size.MEDIUM.value:
|
|
|
|
choice = rnd.choice(["plot", "recurse"])
|
|
|
|
if choice == "plot":
|
|
return generate_random_plot(cell)
|
|
|
|
elif choice == "recurse":
|
|
return generate_recursive_random_table(
|
|
cell,
|
|
border_width=1,
|
|
layout=random.choice(["open", "horizontal", "vertical"]),
|
|
double_rule=False,
|
|
)
|
|
|
|
else:
|
|
return generate_text_block(cell, f"{choice} {size:.0f} {get_size_class(cell).name}")
|
|
|
|
elif size <= Size.LARGE.value:
|
|
|
|
choice = rnd.choice(["plot", "recurse"])
|
|
|
|
logger.debug(f"Generating {choice} {size:.0f} {get_size_class(cell).name}")
|
|
|
|
if choice == "plot" and is_square_like(cell):
|
|
return generate_random_plot(cell)
|
|
|
|
else:
|
|
logger.debug(f"recurse {size:.0f} {get_size_class(cell).name}")
|
|
return generate_recursive_random_table(
|
|
cell,
|
|
border_width=1,
|
|
layout=random.choice(["open", "horizontal", "vertical"]),
|
|
double_rule=False,
|
|
)
|
|
else:
|
|
return generate_text_block(cell, f"{choice} {size:.0f} {get_size_class(cell).name}")
|
|
|
|
cell = inner(cell)
|
|
|
|
assert cell.content.mode == "RGBA"
|
|
|
|
yield cell
|
|
|
|
def draw_cell_borders(self, cells: List[ContentRectangle]):
|
|
def draw_edges_based_on_position(cell: Cell, col_idx, row_index):
|
|
# Draw the borders of the cell based on its position in the table
|
|
if col_idx < self.n_columns - 1:
|
|
cell.draw_right_border()
|
|
|
|
if row_index < self.n_rows - 1:
|
|
cell.draw_bottom_border()
|
|
|
|
columns = chunks(self.n_rows, cells)
|
|
for col_idx, columns in enumerate(columns):
|
|
for row_index, cell in enumerate(columns):
|
|
# TODO: Refactor
|
|
c = Cell(*cell.coords, self.background_color)
|
|
c.content = cell.content
|
|
draw_edges_based_on_position(c, col_idx, row_index)
|
|
yield cell
|
|
|
|
if self.layout == "closed":
|
|
# TODO: Refactor
|
|
c = Cell(*self.coords, self.background_color)
|
|
c.content = self.content
|
|
c.draw()
|
|
yield self
|
|
|
|
# TODO: Refactor
|
|
if self.double_rule:
|
|
c1 = Cell(*self.coords)
|
|
c1.draw_top_border(width=1)
|
|
c1.draw_bottom_border(width=1)
|
|
|
|
# self.content = superimpose_texture_with_transparency(c.content, self.content)
|
|
|
|
x1, y1, x2, y2 = self.coords
|
|
c2 = Cell(x1, y1 + self.double_rule_width, x2, y2 - self.double_rule_width)
|
|
c2.draw_top_border(width=1)
|
|
c2.draw_bottom_border(width=1)
|
|
|
|
c = superimpose_texture_with_transparency(c1.content, c2.content)
|
|
|
|
self.content = superimpose_texture_with_transparency(c, self.content)
|
|
|
|
yield self
|
|
|
|
def generate_table(self) -> Iterable[ContentRectangle]:
|
|
yield from mapcat(self.generate_column, range(self.n_columns))
|
|
|
|
def generate_column(self, column_index) -> Iterable[ContentRectangle]:
|
|
logger.trace(f"Generating column {column_index}.")
|
|
generate_cell_for_row_index = partial(self.generate_cell, column_index)
|
|
yield from map(generate_cell_for_row_index, range(self.n_rows))
|
|
|
|
def generate_cell(self, column_index, row_index) -> ContentRectangle:
|
|
w, h = self.cell_size
|
|
x1, y1 = (column_index * w), (row_index * h) + self.double_rule_width
|
|
x2, y2 = x1 + w, y1 + h
|
|
logger.trace(f"Generating cell ({row_index}, {column_index}) at ({x1}, {y1}, {x2}, {y2}).")
|
|
return Cell(x1, y1, x2, y2, self.background_color)
|
|
|
|
def generate_column_names(self):
|
|
column_names = repeatedly(self.generate_column_name, self.n_columns)
|
|
return column_names
|
|
|
|
def generate_column_name(self):
|
|
column_name = generate_random_words(1, 3)
|
|
return column_name
|
|
|
|
|
|
class Cell(ContentRectangle):
|
|
def __init__(self, x1, y1, x2, y2, color=None):
|
|
super().__init__(x1, y1, x2, y2)
|
|
|
|
self.background_color = color or (255, 255, 255, 0)
|
|
|
|
# to debug use random border color: tuple([random.randint(100, 200) for _ in range(3)] + [255])
|
|
self.cell_border_color = (0, 0, 0, 255)
|
|
|
|
self.border_width = 1
|
|
self.inset = 1
|
|
|
|
self.content = Image.new("RGBA", (self.width, self.height))
|
|
self.fill()
|
|
|
|
def draw_top_border(self, width=None):
|
|
self.draw_line((0, 0, self.width - self.inset, 0), width=width)
|
|
return self
|
|
|
|
def draw_bottom_border(self, width=None):
|
|
self.draw_line((0, self.height - self.inset, self.width - self.inset, self.height - self.inset), width=width)
|
|
return self
|
|
|
|
def draw_left_border(self, width=None):
|
|
self.draw_line((0, 0, 0, self.height), width=width)
|
|
return self
|
|
|
|
def draw_right_border(self, width=None):
|
|
self.draw_line((self.width - self.inset, 0, self.width - self.inset, self.height), width=width)
|
|
return self
|
|
|
|
def draw_line(self, points, width=None):
|
|
width = width or self.border_width
|
|
draw = ImageDraw.Draw(self.content)
|
|
draw.line(points, width=width, fill=self.cell_border_color)
|
|
return self
|
|
|
|
def draw(self, width=None):
|
|
self.draw_top_border(width=width)
|
|
self.draw_bottom_border(width=width)
|
|
self.draw_left_border(width=width)
|
|
self.draw_right_border(width=width)
|
|
return self
|
|
|
|
def draw_top_left_corner(self, width=None):
|
|
self.draw_line((0, 0, 0, 0), width=width)
|
|
self.draw_line((0, 0, 0, 0), width=width)
|
|
return self
|
|
|
|
def draw_top_right_corner(self, width=None):
|
|
self.draw_line((self.width - self.inset, 0, self.width - self.inset, 0), width=width)
|
|
self.draw_line((self.width - self.inset, 0, self.width - self.inset, 0), width=width)
|
|
return self
|
|
|
|
def draw_bottom_left_corner(self, width=None):
|
|
self.draw_line((0, self.height - self.inset, 0, self.height - self.inset), width=width)
|
|
self.draw_line((0, self.height - self.inset, 0, self.height - self.inset), width=width)
|
|
return self
|
|
|
|
def draw_bottom_right_corner(self, width=None):
|
|
self.draw_line(
|
|
(self.width - self.inset, self.height - self.inset, self.width - self.inset, self.height - self.inset),
|
|
width=width,
|
|
)
|
|
self.draw_line(
|
|
(self.width - self.inset, self.height - self.inset, self.width - self.inset, self.height - self.inset),
|
|
width=width,
|
|
)
|
|
return self
|
|
|
|
def fill(self, color=None):
|
|
color = color or self.background_color
|
|
image = Image.new("RGBA", (self.width, self.height), color=color)
|
|
self.content = superimpose_texture_with_transparency(image, self.content)
|
|
return self
|
|
|
|
|
|
def generate_random_words(n_min, n_max):
|
|
column_name = Faker().words(rnd.randint(n_min, n_max))
|
|
return column_name
|
|
|
|
|
|
def shrink_rectangle(rectangle: Rectangle, factor: float) -> Rectangle:
|
|
x1, y1, x2, y2 = compute_scaled_coordinates(rectangle, (1 - factor))
|
|
|
|
logger.trace(f"Shrinking {rectangle} by {factor} to ({x1}, {y1}, {x2}, {y2}).")
|
|
|
|
assert x1 >= rectangle.x1
|
|
assert y1 >= rectangle.y1
|
|
assert x2 <= rectangle.x2
|
|
assert y2 <= rectangle.y2
|
|
|
|
shrunk_rectangle = Rectangle(x1, y1, x2, y2)
|
|
|
|
if isinstance(rectangle, ContentRectangle): # TODO: Refactor
|
|
shrunk_rectangle = ContentRectangle(*shrunk_rectangle.coords, rectangle.content)
|
|
|
|
return shrunk_rectangle
|
|
|
|
|
|
def compute_scaled_coordinates(rectangle: Rectangle, factor: float) -> Tuple[int, int, int, int]:
|
|
# TODO: Refactor: Using image to compute coordinates is not clean
|
|
image = Image.new("RGBA", (rectangle.width, rectangle.height))
|
|
scaled = image.resize((int(rectangle.width * factor), int(rectangle.height * factor)))
|
|
|
|
x1, y1 = compute_pasting_coordinates(scaled, image)
|
|
x1 = rectangle.x1 + x1
|
|
y1 = rectangle.y1 + y1
|
|
x2, y2 = x1 + scaled.width, y1 + scaled.height
|
|
return x1, y1, x2, y2
|
|
|
|
|
|
class RandomTable(RandomContentRectangle):
|
|
def __init__(self, x1, y1, x2, y2, seed=None):
|
|
super().__init__(x1, y1, x2, y2, seed=seed)
|
|
self.font = pick_random_mono_space_font_available_on_system(includes=("bold",), excludes=("italic", "oblique"))
|
|
|
|
def generate_random_table(self, rectangle: Rectangle):
|
|
"""Generates the image of a random table.
|
|
|
|
Reference: https://stackoverflow.com/questions/35634238/how-to-save-a-pandas-dataframe-table-as-a-png
|
|
"""
|
|
|
|
text_table = self.generate_random_ascii_table(rectangle)
|
|
table_lines = text_table.split("\n")
|
|
image = write_lines_to_image(table_lines, rectangle)
|
|
self.join_lines(image)
|
|
|
|
self.content = image if not self.content else superimpose_texture_with_transparency(self.content, image)
|
|
|
|
def generate_random_dataframe(self, rectangle: Rectangle):
|
|
"""Generates a random dataframe that has as many rows and columns as to fit the given rectangle."""
|
|
|
|
image = Image.new("RGB", (rectangle.width, rectangle.height), color="white")
|
|
draw = ImageDraw.Draw(image)
|
|
text_size = draw.textsize("dummy", font=ImageFont.load_default())[1]
|
|
|
|
rows = rectangle.height // text_size
|
|
|
|
col_names = list(string.ascii_uppercase)
|
|
|
|
cols = min(rectangle.width // text_size, len(col_names))
|
|
|
|
df = pd.DataFrame(
|
|
np.random.randint(0, 100, size=(rows, cols)),
|
|
columns=col_names[:cols],
|
|
)
|
|
|
|
return df
|
|
|
|
def join_lines(self, table: Image.Image):
|
|
table = normalize_image_format_to_array(table)
|
|
table = normalize_to_gray_scale(table)
|
|
grid = isolate_vertical_and_horizontal_components(table)
|
|
# grid = cv2.bitwise_not(grid)
|
|
|
|
def generate_random_ascii_table(self, rectangle: Rectangle):
|
|
df = self.generate_random_dataframe(rectangle)
|
|
table_format = rnd.choice(
|
|
[
|
|
# "simple",
|
|
"grid",
|
|
# "presto",
|
|
# "psql",
|
|
# "rst",
|
|
]
|
|
)
|
|
text_table = tabulate(df, headers="keys", tablefmt=table_format)
|
|
return text_table
|
|
|
|
|
|
def dump_plt_to_image(rectangle):
|
|
buf = io.BytesIO()
|
|
plt.savefig(buf, format="png", transparent=True)
|
|
buf.seek(0)
|
|
image = Image.open(buf)
|
|
image = image.resize((rectangle.width, rectangle.height))
|
|
buf.close()
|
|
plt.close()
|
|
return image
|
|
|
|
|
|
class RandomFontPicker:
|
|
def __init__(self, font_dir=None, return_default_font=False):
|
|
fonts = get_fonts(font_dir)
|
|
fonts_lower = [font.lower() for font in fonts]
|
|
domestic_fonts_mask = lmap(complement(self.looks_foreign), fonts_lower)
|
|
self.fonts = list(itertools.compress(fonts, domestic_fonts_mask))
|
|
self.fonts_lower = list(itertools.compress(fonts_lower, domestic_fonts_mask))
|
|
|
|
self.test_image = Image.new("RGB", (200, 200), (255, 255, 255))
|
|
self.draw = ImageDraw.Draw(self.test_image)
|
|
self.return_default_font = return_default_font
|
|
|
|
def looks_foreign(self, font):
|
|
# This filters out foreign fonts (e.g. 'Noto Serif Malayalam')
|
|
return len(font.split("-")[0]) > 10
|
|
|
|
def pick_random_font_available_on_system(self, includes=None, excludes=None) -> ImageFont: # FIXME: Slow!
|
|
|
|
if self.return_default_font:
|
|
return ImageFont.load_default()
|
|
|
|
includes = [i.lower() for i in includes] if includes else []
|
|
excludes = [i.lower() for i in excludes] if excludes else []
|
|
|
|
logger.debug(f"Picking font by includes={includes} and excludes={excludes}.")
|
|
|
|
def includes_pattern(font):
|
|
return not includes or any(include in font for include in includes)
|
|
|
|
def excludes_pattern(font):
|
|
return not excludes or not any(exclude in font for exclude in excludes)
|
|
|
|
self.shuffle_fonts()
|
|
|
|
mask = lmap(lambda f: includes_pattern(f) and excludes_pattern(f), self.fonts_lower)
|
|
fonts = itertools.compress(self.fonts, mask)
|
|
fonts = keep(map(self.load_font, fonts))
|
|
# fonts = filter(self.font_is_renderable, fonts) # FIXME: this does not work
|
|
|
|
font = first(fonts)
|
|
logger.info(f"Using font: {font.getname()}")
|
|
return font
|
|
|
|
def shuffle_fonts(self):
|
|
l = lzip(self.fonts, self.fonts_lower)
|
|
rnd.shuffle(l)
|
|
self.fonts, self.fonts_lower = lzip(*l)
|
|
|
|
def pick_random_mono_space_font_available_on_system(self) -> ImageFont:
|
|
return self.pick_random_font_available_on_system(includes=["mono"], excludes=["oblique"])
|
|
|
|
@lru_cache(maxsize=None)
|
|
def load_font(self, font: str):
|
|
logger.trace(f"Loading font: {font}")
|
|
try:
|
|
return ImageFont.truetype(font, size=11)
|
|
except OSError:
|
|
return None
|
|
|
|
@lru_cache(maxsize=None)
|
|
def font_is_renderable(self, font):
|
|
text_size = self.draw.textsize("Test String", font=font)
|
|
return text_size[0] > 0 and text_size[1]
|
|
|
|
|
|
def get_fonts(path: Path = None) -> List[str]:
|
|
path = path or Path("/usr/share/fonts")
|
|
fonts = list(path.rglob("*.ttf"))
|
|
fonts = [font.name for font in fonts]
|
|
return fonts
|
|
|
|
|
|
@lru_cache(maxsize=None)
|
|
def get_font_picker(**kwargs):
|
|
return RandomFontPicker(**kwargs, return_default_font=True)
|
|
|
|
|
|
@lru_cache(maxsize=None)
|
|
def pick_random_mono_space_font_available_on_system(**kwargs):
|
|
font_picker = get_font_picker(**omit(kwargs, ["includes", "excludes"]))
|
|
return font_picker.pick_random_mono_space_font_available_on_system()
|
|
|
|
|
|
@lru_cache(maxsize=None)
|
|
def pick_random_font_available_on_system(**kwargs):
|
|
kwargs["excludes"] = (
|
|
*kwargs.get(
|
|
"excludes",
|
|
),
|
|
"Kinnari",
|
|
"KacstOne",
|
|
)
|
|
font_picker = get_font_picker(**omit(kwargs, ["includes", "excludes"]))
|
|
return font_picker.pick_random_font_available_on_system(**project(kwargs, ["includes", "excludes"]))
|
|
|
|
|
|
@lru_cache(maxsize=None)
|
|
def pick_colormap() -> ListedColormap:
|
|
cmap_name = rnd.choice(
|
|
[
|
|
"viridis",
|
|
"plasma",
|
|
"inferno",
|
|
"magma",
|
|
"cividis",
|
|
],
|
|
)
|
|
cmap = plt.get_cmap(cmap_name)
|
|
return cmap
|
|
|
|
|
|
class RandomPlot(RandomContentRectangle):
|
|
def __init__(self, x1, y1, x2, y2, seed=None):
|
|
super().__init__(x1, y1, x2, y2, seed=seed)
|
|
|
|
self.cmap = pick_colormap()
|
|
|
|
def __call__(self, *args, **kwargs):
|
|
pass
|
|
|
|
def generate_random_plot(self, rectangle: Rectangle):
|
|
|
|
if is_square_like(rectangle):
|
|
plt_fn = rnd.choice(
|
|
[
|
|
self.generate_random_line_plot,
|
|
self.generate_random_bar_plot,
|
|
self.generate_random_scatter_plot,
|
|
self.generate_random_histogram,
|
|
self.generate_random_pie_chart,
|
|
]
|
|
)
|
|
elif is_wide(rectangle):
|
|
plt_fn = rnd.choice(
|
|
[
|
|
self.generate_random_line_plot,
|
|
self.generate_random_histogram,
|
|
self.generate_random_bar_plot,
|
|
]
|
|
)
|
|
elif is_tall(rectangle):
|
|
plt_fn = rnd.choice(
|
|
[
|
|
self.generate_random_bar_plot,
|
|
self.generate_random_histogram,
|
|
]
|
|
)
|
|
else:
|
|
plt_fn = self.generate_random_scatter_plot
|
|
|
|
plt_fn(rectangle)
|
|
|
|
def generate_random_bar_plot(self, rectangle: Rectangle):
|
|
x = sorted(np.random.randint(low=1, high=11, size=5))
|
|
y = np.random.randint(low=1, high=11, size=5)
|
|
bar_fn = partial(
|
|
plt.bar,
|
|
log=random.choice([True, False]),
|
|
)
|
|
self.__generate_random_plot(bar_fn, rectangle, x, y)
|
|
|
|
def generate_random_line_plot(self, rectangle: Rectangle):
|
|
f = rnd.choice([np.sin, np.cos, np.tan, np.exp, np.log, np.sqrt, np.square])
|
|
|
|
x = np.linspace(0, 10, 100)
|
|
y = f(x)
|
|
|
|
plot_fn = partial(
|
|
plt.plot,
|
|
)
|
|
|
|
self.__generate_random_plot(plot_fn, rectangle, x, y)
|
|
|
|
def generate_random_scatter_plot(self, rectangle: Rectangle):
|
|
x = np.random.normal(size=100)
|
|
y = np.random.normal(size=100)
|
|
self.__generate_random_plot(plt.scatter, rectangle, x, y)
|
|
|
|
def generate_random_histogram(self, rectangle: Rectangle):
|
|
x = np.random.normal(size=100)
|
|
hist_fn = partial(
|
|
plt.hist,
|
|
orientation=random.choice(["horizontal", "vertical"]),
|
|
histtype=random.choice(["bar", "barstacked", "step", "stepfilled"]),
|
|
log=random.choice([True, False]),
|
|
stacked=random.choice([True, False]),
|
|
density=random.choice([True, False]),
|
|
cumulative=random.choice([True, False]),
|
|
)
|
|
self.__generate_random_plot(hist_fn, rectangle, x, random.randint(5, 20))
|
|
|
|
def generate_random_pie_chart(self, rectangle: Rectangle):
|
|
|
|
n = random.randint(3, 7)
|
|
x = np.random.uniform(size=n)
|
|
pie_fn = partial(
|
|
plt.pie,
|
|
shadow=True,
|
|
startangle=90,
|
|
pctdistance=0.85,
|
|
labeldistance=1.1,
|
|
colors=self.cmap(np.linspace(0, 1, 10)),
|
|
)
|
|
self.__generate_random_plot(
|
|
pie_fn,
|
|
rectangle,
|
|
x,
|
|
np.random.uniform(0, 0.1, size=n),
|
|
plot_kwargs=self.generate_plot_kwargs(keywords=["a"]),
|
|
)
|
|
|
|
def generate_plot_kwargs(self, keywords=None):
|
|
|
|
kwargs = {
|
|
"color": rnd.choice(self.cmap.colors),
|
|
"linestyle": rnd.choice(["-", "--", "-.", ":"]),
|
|
"linewidth": rnd.uniform(1, 4),
|
|
}
|
|
|
|
return kwargs if not keywords else {k: v for k, v in kwargs.items() if k in keywords}
|
|
|
|
def __generate_random_plot(self, plot_fn, rectangle: Rectangle, x, y, plot_kwargs=None):
|
|
|
|
plot_kwargs = self.generate_plot_kwargs() if plot_kwargs is None else plot_kwargs
|
|
|
|
fig, ax = plt.subplots()
|
|
fig.set_size_inches(rectangle.width / 100, rectangle.height / 100)
|
|
fig.tight_layout(pad=0)
|
|
|
|
plot_fn(x, y, **plot_kwargs)
|
|
ax.set_facecolor("none")
|
|
|
|
maybe() and ax.set_title("Figure Title")
|
|
|
|
# disable axes at random
|
|
maybe() and ax.set_xticks([])
|
|
maybe() and ax.set_yticks([])
|
|
maybe() and ax.set_xticklabels([])
|
|
maybe() and ax.set_yticklabels([])
|
|
maybe() and ax.set_xlabel("")
|
|
maybe() and ax.set_ylabel("")
|
|
maybe() and ax.set_title("")
|
|
maybe() and ax.set_frame_on(False)
|
|
|
|
# remove spines at random
|
|
maybe() and (ax.spines["top"].set_visible(False) or ax.spines["right"].set_visible(False))
|
|
|
|
image = dump_plt_to_image(rectangle)
|
|
assert image.mode == "RGBA"
|
|
|
|
self.content = image if not self.content else superimpose_texture_with_transparency(self.content, image)
|
|
|
|
|
|
def maybe():
|
|
return rnd.random() > 0.9
|
|
|
|
|
|
def probably():
|
|
return rnd.random() > 0.4
|
|
|
|
|
|
def generate_random_text_block(rectangle: Rectangle, n_sentences=3000) -> ContentRectangle:
|
|
block = TextBlock(
|
|
*rectangle.coords,
|
|
font=pick_random_font_available_on_system(
|
|
includes=("serif", "sans-serif"),
|
|
excludes=("bold", "mono", "italic", "oblique", "cursive"),
|
|
),
|
|
font_size=30, # TODO: De-hardcode font size... Seems to have no effect on top of that
|
|
)
|
|
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
|
block.generate_random_text(rectangle, n_sentences)
|
|
return block
|
|
|
|
|
|
def generate_random_image_caption(rectangle: Rectangle) -> ContentRectangle:
|
|
return generate_random_caption(rectangle, f"Fig {rnd.randint(1, 20)}")
|
|
|
|
|
|
def generate_random_table_caption(rectangle: Rectangle) -> ContentRectangle:
|
|
return generate_random_caption(rectangle, f"Tabl {rnd.randint(1, 20)}")
|
|
|
|
|
|
def generate_random_caption(rectangle: Rectangle, caption_start, n_sentences=1000) -> ContentRectangle:
|
|
block = TextBlock(
|
|
*rectangle.coords,
|
|
text_generator=CaptionGenerator(caption_start=caption_start),
|
|
font=pick_random_font_available_on_system(
|
|
includes=("italic",),
|
|
excludes=("bold", "mono"),
|
|
),
|
|
font_size=100, # TODO: De-hardcode font size... Seems to have no effect on top of that
|
|
)
|
|
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
|
block.generate_random_text(rectangle, n_sentences)
|
|
return block
|
|
|
|
|
|
def generate_text_block(rectangle: Rectangle, text) -> ContentRectangle:
|
|
block = TextBlock(
|
|
*rectangle.coords,
|
|
font=pick_random_font_available_on_system(
|
|
includes=("serif", "sans-serif", "bold"),
|
|
excludes=("mono", "italic", "oblique", "cursive"),
|
|
),
|
|
font_size=30, # TODO: De-hardcode font size... Seems to have no effect on top of that
|
|
)
|
|
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
|
block.put_text(text, rectangle)
|
|
return block
|
|
|
|
|
|
def write_lines_to_image(lines: List[str], rectangle: Rectangle, font=None) -> Image.Image:
|
|
def write_line(line, line_number):
|
|
draw.text((0, line_number * text_size), line, font=font, fill=(0, 0, 0, 255))
|
|
|
|
font = font or pick_random_mono_space_font_available_on_system()
|
|
|
|
image = Image.new("RGBA", (rectangle.width, rectangle.height), (0, 255, 255, 0))
|
|
draw = ImageDraw.Draw(image)
|
|
text_size = draw.textsize(first(lines), font=font)[1]
|
|
|
|
for line_number, line in enumerate(lines):
|
|
write_line(line, line_number)
|
|
|
|
return image
|
|
|
|
|
|
class LineFormatter(abc.ABC):
|
|
pass
|
|
|
|
|
|
class IdentityLineFormatter(LineFormatter):
|
|
def __init__(self):
|
|
pass
|
|
|
|
def __call__(self, lines, last_full):
|
|
return lines, last_full
|
|
|
|
|
|
class ParagraphLineFormatter(LineFormatter):
|
|
def __init__(self, blank_line_percentage=None):
|
|
self.blank_line_percentage = blank_line_percentage or rnd.uniform(0, 0.5)
|
|
|
|
def __call__(self, lines, last_full):
|
|
return self.format_lines(lines, last_full)
|
|
|
|
def format_lines(self, lines, last_full):
|
|
def truncate_current_line():
|
|
return rnd.random() < self.blank_line_percentage and last_full
|
|
|
|
# This is meant to be read from the bottom up.
|
|
current_line_shall_not_be_a_full_line = truncate_current_line()
|
|
line_formatter = self.truncate_line if current_line_shall_not_be_a_full_line else identity
|
|
format_current_line = compose(line_formatter, first)
|
|
move_current_line_to_back = star(rconj)
|
|
split_first_line_from_lines_and_format_the_former = juxt(rest, format_current_line)
|
|
split_off_current_line_then_format_it_then_move_it_to_the_back = rcompose(
|
|
split_first_line_from_lines_and_format_the_former,
|
|
move_current_line_to_back,
|
|
)
|
|
current_line_is_a_full_line = not current_line_shall_not_be_a_full_line
|
|
# Start reading here and move up.
|
|
return split_off_current_line_then_format_it_then_move_it_to_the_back(lines), current_line_is_a_full_line
|
|
|
|
def format_line(self, line, full=True):
|
|
line = self.truncate_line(line) if not full else line
|
|
return line, full
|
|
|
|
def truncate_line(self, line: str):
|
|
n_trailing_words = rnd.randint(0, 4)
|
|
line = " ".join(line.split()[-n_trailing_words - 1 : -1]).replace(".", "")
|
|
line = line + ".\n" if line else line
|
|
return line
|
|
|
|
|
|
class TextBlockGenerator(abc.ABC):
|
|
pass
|
|
|
|
|
|
class ParagraphGenerator(TextBlockGenerator):
|
|
def __init__(self):
|
|
self.line_formatter = ParagraphLineFormatter(blank_line_percentage=rnd.uniform(0, 0.5))
|
|
|
|
def __call__(self, rectangle, n_sentences):
|
|
return self.generate_paragraph(rectangle, n_sentences)
|
|
|
|
def generate_paragraph(self, rectangle, n_sentences):
|
|
lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences)
|
|
return lines
|
|
|
|
|
|
class CaptionGenerator(TextBlockGenerator):
|
|
def __init__(self, caption_start=None):
|
|
self.line_formatter = IdentityLineFormatter()
|
|
self.caption_start = caption_start or f"Fig {rnd.randint(1, 20)}"
|
|
|
|
def __call__(self, rectangle, n_sentences):
|
|
return self.generate_paragraph(rectangle, n_sentences)
|
|
|
|
def generate_paragraph(self, rectangle, n_sentences):
|
|
lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences)
|
|
first_line_modified = f"{self.caption_start}.: {first(lines)}"
|
|
lines = conj(first_line_modified, rest(lines))
|
|
return lines
|
|
|
|
|
|
class TextBlock(ContentRectangle):
|
|
def __init__(self, x1, y1, x2, y2, text_generator=None, font=None, font_size=None):
|
|
super().__init__(x1, y1, x2, y2)
|
|
self.font = font or ImageFont.load_default() # pick_random_font_available_on_system(size=font_size)
|
|
self.text_generator = text_generator or ParagraphGenerator()
|
|
|
|
def __call__(self, *args, **kwargs):
|
|
pass
|
|
|
|
def generate_random_text(self, rectangle: Rectangle, n_sentences=3000):
|
|
lines = self.text_generator(rectangle, n_sentences)
|
|
image = write_lines_to_image(lines, rectangle, self.font)
|
|
return self.__put_content(image)
|
|
|
|
def put_text(self, text: str, rectangle: Rectangle):
|
|
|
|
text_width, text_height = self.font.getsize(text)
|
|
|
|
width_delta = text_width - rectangle.width
|
|
height_delta = text_height - rectangle.height
|
|
|
|
image = Image.new("RGBA", (text_width, text_height), (0, 255, 255, 0))
|
|
|
|
if width_delta > 0 or height_delta > 0:
|
|
image = image.resize((int(rectangle.width * 0.9), text_height))
|
|
|
|
draw = ImageDraw.Draw(image)
|
|
draw.text((0, 0), text, font=self.font, fill=(0, 0, 0, 255))
|
|
return self.__put_content(image)
|
|
|
|
def __put_content(self, image: Image.Image):
|
|
self.content = image if not self.content else superimpose_texture_with_transparency(self.content, image)
|
|
assert self.content.mode == "RGBA"
|
|
return self
|
|
|
|
|
|
def generate_random_text_lines(rectangle: Rectangle, line_formatter=identity, n_sentences=3000) -> List[str]:
|
|
text = Faker().paragraph(nb_sentences=n_sentences, variable_nb_sentences=False, ext_word_list=None)
|
|
unformatted_lines = textwrap.wrap(text, width=rectangle.width, break_long_words=False)
|
|
# each iteration of the line formatter function formats one more line and adds it to the back of the list
|
|
formatted_lines_generator = iterate(star(line_formatter), (unformatted_lines, True))
|
|
# hence do as many iterations as there are lines in the rectangle
|
|
lines_per_iteration = take(len(unformatted_lines), formatted_lines_generator)
|
|
# and then take the lines from the last iteration of the function
|
|
formatted_lines, _ = last(lines_per_iteration)
|
|
|
|
return formatted_lines
|
|
|
|
|
|
def paste_content(page, content_box: ContentRectangle):
|
|
# assert page.mode == "RGB"
|
|
assert content_box.content.mode == "RGBA"
|
|
page.paste(content_box.content, (content_box.x1, content_box.y1), content_box.content)
|
|
return page
|
|
|
|
|
|
def paste_contents(page, contents: Iterable[ContentRectangle]):
|
|
page = deepcopy(page)
|
|
for content in contents:
|
|
paste_content(page, content)
|
|
return page
|
|
|
|
|
|
class PagePartitioner(abc.ABC):
|
|
def __init__(self):
|
|
self.left_margin_percentage = 0.05
|
|
self.right_margin_percentage = 0.05
|
|
self.top_margin_percentage = 0.1
|
|
self.bottom_margin_percentage = 0.1
|
|
|
|
self.recursive_margin_percentage = 0.007
|
|
self.max_recursion_depth = 3
|
|
self.initial_recursion_probability = 1
|
|
self.recursion_probability_decay = 0.1
|
|
|
|
def __call__(self, page: Image.Image) -> List[Rectangle]:
|
|
left_margin = int(page.width * self.left_margin_percentage)
|
|
right_margin = int(page.width * self.right_margin_percentage)
|
|
top_margin = int(page.height * self.top_margin_percentage)
|
|
bottom_margin = int(page.height * self.bottom_margin_percentage)
|
|
|
|
box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin)
|
|
boxes = lflatten(self.generate_content_boxes(box))
|
|
# boxes = self.drop_small_boxes(boxes, *page.size)
|
|
# boxes = merge_related_rectangles(boxes)
|
|
# boxes = list(boxes)
|
|
return boxes
|
|
|
|
@abc.abstractmethod
|
|
def generate_content_boxes(self, box: Rectangle, depth=0):
|
|
raise NotImplementedError
|
|
|
|
def generate_child_boxes(self, box: Rectangle, axis, split_percentage=0.5) -> Tuple[Rectangle, Rectangle]:
|
|
assert axis in ["x", "y"]
|
|
|
|
edge_anchor_point, edge_length = (box.x1, box.width) if axis == "x" else (box.y1, box.height)
|
|
split_coordinate = split_percentage * edge_length + edge_anchor_point
|
|
child_boxes = get_child_boxes(box, split_coordinate, axis, self.recursive_margin_percentage)
|
|
return child_boxes
|
|
|
|
def recurse(self, depth):
|
|
return rnd.random() <= self.recursion_probability(depth)
|
|
|
|
def recursion_probability(self, depth):
|
|
return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth
|
|
|
|
|
|
class RandomPagePartitioner(PagePartitioner):
|
|
def __init__(self):
|
|
super().__init__()
|
|
|
|
def generate_content_boxes(self, box: Rectangle, depth=0):
|
|
if depth >= self.max_recursion_depth:
|
|
yield box
|
|
else:
|
|
child_boxes = self.generate_child_boxes(
|
|
box,
|
|
axis=rnd.choice(["x", "y"]),
|
|
split_percentage=rnd.uniform(0.3, 0.7),
|
|
)
|
|
if self.recurse(depth):
|
|
yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
|
|
else:
|
|
yield child_boxes
|
|
|
|
|
|
class TwoColumnPagePartitioner(PagePartitioner):
|
|
def __init__(self):
|
|
super().__init__()
|
|
# self.recursive_margin_percentage = 0.1
|
|
# self.left_margin_percentage = 0.1
|
|
# self.right_margin_percentage = 0.1
|
|
self.max_recursion_depth = 3
|
|
|
|
def generate_content_boxes(self, box: Rectangle, depth=0):
|
|
if depth >= self.max_recursion_depth:
|
|
yield box
|
|
|
|
else:
|
|
if depth == 0:
|
|
axis = "x"
|
|
split_percentage = 0.5
|
|
else:
|
|
axis = "y"
|
|
split_percentage = rnd.choice([0.3, 0.7])
|
|
|
|
child_boxes = self.generate_child_boxes(box, axis=axis, split_percentage=split_percentage)
|
|
|
|
yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
|
|
|
|
|
|
def get_child_boxes(box: Rectangle, split_coordinate, axis, margin_percentage) -> Tuple[Rectangle, Rectangle]:
|
|
assert axis in ["x", "y"]
|
|
|
|
def low(point_1d):
|
|
return point_1d * (1 + margin_percentage)
|
|
|
|
def high(point_1d):
|
|
return point_1d * (1 - margin_percentage)
|
|
|
|
if axis == "x":
|
|
return (
|
|
Rectangle(low(box.x1), low(box.y1), high(split_coordinate), high(box.y2)),
|
|
Rectangle(low(split_coordinate), low(box.y1), high(box.x2), high(box.y2)),
|
|
)
|
|
else:
|
|
return (
|
|
Rectangle(low(box.x1), low(box.y1), high(box.x2), high(split_coordinate)),
|
|
Rectangle(low(box.x1), low(split_coordinate), high(box.x2), high(box.y2)),
|
|
)
|
|
|
|
|
|
def drop_small_boxes(boxes: Iterable[Rectangle], page_width, page_height, min_percentage=0.13) -> List[Rectangle]:
|
|
min_width = page_width * min_percentage
|
|
min_height = page_height * min_percentage
|
|
|
|
def small(box: Rectangle):
|
|
return box.width < min_width or box.height < min_height
|
|
|
|
return lremove(small, boxes)
|
|
|
|
|
|
def draw_boxes(page: Image, boxes: Iterable[Rectangle]):
|
|
# page = draw_rectangles(page, boxes, filled=False, annotate=True)
|
|
show_image(page, backend="pil")
|