Matthias Bisping 43688d0f0b Add filtering of interior tables in cell yielding
Interior tables are only an implementaton detail and should not produce
target boxes. Instead now only their cells are yielded.
2023-02-14 16:19:10 +01:00

320 lines
10 KiB
Python

import random
from copy import deepcopy
from enum import Enum
from functools import lru_cache, partial
from math import sqrt
from typing import List, Iterable
from PIL import Image
from funcy import chunks, mapcat, repeatedly
from loguru import logger
from cv_analysis.utils.geometric import is_square_like
from cv_analysis.utils.image_operations import superimpose
from cv_analysis.utils.rectangle import Rectangle
from cv_analysis.utils.spacial import area
from synthesis.random import rnd, possibly
from synthesis.segment.content_rectangle import ContentRectangle
from synthesis.segment.plot import pick_colormap
from synthesis.segment.random_content_rectangle import RandomContentRectangle
from synthesis.segment.recursive_content_rectangle import RecursiveContentRectangle
from synthesis.segment.segments import generate_random_plot, generate_recursive_random_table, generate_text_block
from synthesis.segment.table.cell import Cell
from synthesis.text.text import generate_random_words, generate_random_number
class RecursiveRandomTable(RandomContentRectangle, RecursiveContentRectangle):
def __init__(self, x1, y1, x2, y2, border_width=1, layout: str = None, double_rule=False):
"""A table with a random number of rows and columns, and random content in each cell.
Args:
x1: x-coordinate of the top-left corner
y1: y-coordinate of the top-left corner
x2: x-coordinate of the bottom-right corner
y2: y-coordinate of the bottom-right corner
border_width: width of the table border
layout: layout of the table, either "horizontal", "vertical", "closed", or "open"
double_rule: whether to use double rules as the top and bottom rules
"""
assert layout in [None, "horizontal", "vertical", "closed", "open"]
super().__init__(x1, y1, x2, y2)
self.double_rule = double_rule
self.double_rule_width = (3 * border_width) if self.double_rule else 0
self.n_columns = rnd.randint(1, max(self.width // 100, 1))
self.n_rows = rnd.randint(1, max((self.height - 2 * self.double_rule_width) // rnd.randint(17, 100), 1))
self.cell_size = (self.width / self.n_columns, (self.height - 2 * self.double_rule_width) / self.n_rows)
self.content = Image.new("RGBA", (self.width, self.height), (255, 255, 255, 0))
self.background_color = get_random_background_color()
self.layout = layout or self.pick_random_layout()
logger.debug(f"Layout: {self.layout}")
self.__cells = []
@property
def cells(self):
return self.__cells
@property
def child_boxes(self):
for cell in self.cells:
# TODO: this is not very clean
cell = deepcopy(cell)
cell.shift(self.x1, self.y1)
yield cell
def has_child_boxes(self):
return True
def pick_random_layout(self):
if self.n_columns == 1 and self.n_rows == 1:
layout = "closed"
elif self.n_columns == 1:
layout = rnd.choice(["vertical", "closed"])
elif self.n_rows == 1:
layout = rnd.choice(["horizontal", "closed"])
else:
layout = rnd.choice(["closed", "horizontal", "vertical", "open"])
return layout
def generate_random_table(self, draw_cell_content=False):
"""Generate a random table. The table is generated by first generating a random layout, and then filling the
cells with content recursively.
Args:
draw_cell_content: Whether to draw the content of each cell. If False, only the table border is drawn. Cells
can be accessed and drawn later.
Returns:
None
"""
cells = self.generate_table()
cells = list(self.fill_cells_with_content(cells))
cells = list(self.draw_cell_borders(cells))
# TODO: This is not very clean.
if draw_cell_content:
self.content = paste_contents(self.content, cells)
assert self.content.mode == "RGBA"
self.__cells.extend(cells)
def fill_cells_with_content(self, cells):
yield from map(self.build_cell, cells)
def build_cell(self, cell):
if self.__is_a_small_cell(cell):
cell = self.build_small_cell(cell)
elif self.__is_a_medium_sized_cell(cell):
cell = self.build_medium_sized_cell(cell)
elif self.__is_a_large_cell(cell):
cell = self.build_large_cell(cell)
else:
raise ValueError(f"Invalid cell size: {get_size(cell)}")
assert cell.content.mode == "RGBA"
return cell
def __is_a_small_cell(self, cell):
return get_size(cell) <= Size.SMALL.value
def __is_a_medium_sized_cell(self, cell):
return get_size(cell) <= Size.MEDIUM.value
def __is_a_large_cell(self, cell):
return get_size(cell) > Size.MEDIUM.value
def build_small_cell(self, cell):
content = (possibly() and generate_random_words(1, 3)) or (
generate_random_number()
+ ((possibly() and " " + rnd.choice(["$", "£", "%", "EUR", "USD", "CAD", "ADA"])) or "")
)
return generate_text_block(cell, content)
def build_medium_sized_cell(self, cell):
choice = rnd.choice(["plot", "recurse"])
if choice == "plot":
return generate_random_plot(cell)
elif choice == "recurse":
return generate_recursive_random_table(
cell,
border_width=1,
layout=random.choice(["open", "horizontal", "vertical"]),
double_rule=False,
)
else:
return generate_text_block(cell, f"{choice} {get_size(cell):.0f} {get_size_class(cell).name}")
def build_large_cell(self, cell):
choice = rnd.choice(["plot", "recurse"])
logger.debug(f"Generating {choice} {get_size(cell):.0f} {get_size_class(cell).name}")
if choice == "plot" and is_square_like(cell):
return generate_random_plot(cell)
else:
logger.debug(f"recurse {get_size(cell):.0f} {get_size_class(cell).name}")
return generate_recursive_random_table(
cell,
border_width=1,
layout=random.choice(["open", "horizontal", "vertical"]),
double_rule=False,
)
def draw_cell_borders(self, cells: List[ContentRectangle]):
columns = chunks(self.n_rows, cells)
for col_idx, column in enumerate(columns):
for row_index, cell in enumerate(column):
self.draw_cell(cell, col_idx, row_index)
yield cell
if self.layout == "closed":
self.draw_table_borders()
if self.double_rule:
self.draw_table_rule()
def draw_cell(self, cell, col_idx, row_index):
# TODO: Refactor
c = Cell(*cell.coords, self.background_color)
c.content = cell.content
self.draw_edges_based_on_position(c, col_idx, row_index)
def draw_edges_based_on_position(self, cell: Cell, col_idx, row_index):
"""Draw the edges of a cell based on its position in the table."""
if col_idx < self.n_columns - 1:
cell.draw_right_border()
if row_index < self.n_rows - 1:
cell.draw_bottom_border()
def draw_table_rule(self):
# TODO: Refactor
c1 = Cell(*self.coords)
c1.draw_top_border(width=1)
c1.draw_bottom_border(width=1)
x1, y1, x2, y2 = self.coords
c2 = Cell(x1, y1 + self.double_rule_width, x2, y2 - self.double_rule_width)
c2.draw_top_border(width=1)
c2.draw_bottom_border(width=1)
c = superimpose(c1.content, c2.content)
self.content = superimpose(c, self.content)
def draw_table_borders(self):
# TODO: Refactor
c = Cell(*self.coords, self.background_color)
c.content = self.content
c.draw()
yield self
def generate_table(self) -> Iterable[ContentRectangle]:
yield from mapcat(self.generate_column, range(self.n_columns))
def generate_column(self, column_index) -> Iterable[ContentRectangle]:
logger.trace(f"Generating column {column_index}.")
generate_cell_for_row_index = partial(self.generate_cell, column_index)
yield from map(generate_cell_for_row_index, range(self.n_rows))
def generate_cell(self, column_index, row_index) -> ContentRectangle:
w, h = self.cell_size
x1, y1 = (column_index * w), (row_index * h) + self.double_rule_width
x2, y2 = x1 + w, y1 + h
logger.trace(f"Generating cell ({row_index}, {column_index}) at ({x1}, {y1}, {x2}, {y2}).")
return Cell(x1, y1, x2, y2, self.background_color)
def generate_column_names(self):
column_names = repeatedly(self.generate_column_name, self.n_columns)
return column_names
def generate_column_name(self):
column_name = generate_random_words(1, 3)
return column_name
@lru_cache(maxsize=None)
def get_random_background_color():
return tuple([*get_random_color_complementing_color_map(pick_colormap()), rnd.randint(100, 210)])
def get_random_color_complementing_color_map(colormap):
def color_complement(r, g, b):
"""Reference: https://stackoverflow.com/a/40234924"""
def hilo(a, b, c):
if c < b:
b, c = c, b
if b < a:
a, b = b, a
if c < b:
b, c = c, b
return a + c
k = hilo(r, g, b)
return tuple(k - u for u in (r, g, b))
color = colormap(0.2)[:3]
color = [int(255 * v) for v in color]
color = color_complement(*color)
return color
def paste_contents(page, contents: Iterable[ContentRectangle]):
page = deepcopy(page)
for content in contents:
paste_content(page, content)
return page
def paste_content(page, content_box: ContentRectangle):
assert content_box.content.mode == "RGBA"
page.paste(content_box.content, (content_box.x1, content_box.y1), content_box.content)
return page
def get_size_class(rectangle: Rectangle):
size = get_size(rectangle)
if size < Size.SMALL.value:
return Size.SMALL
elif size < Size.LARGE.value:
return Size.MEDIUM
else:
return Size.LARGE
def get_size(rectangle: Rectangle):
size = sqrt(area(rectangle))
return size
class Size(Enum):
# FIXME: this has to scale with the DPI
SMALL = 120
MEDIUM = 180
LARGE = 300