[WIP] recursive random table
This commit is contained in:
parent
4d11a157e5
commit
893622a73e
114
test/fixtures/page_generation/page.py
vendored
114
test/fixtures/page_generation/page.py
vendored
@ -1,26 +1,27 @@
|
||||
import io
|
||||
import itertools
|
||||
import os
|
||||
import random
|
||||
import string
|
||||
import textwrap
|
||||
from functools import lru_cache
|
||||
from functools import lru_cache, partial
|
||||
from pathlib import Path
|
||||
from typing import Tuple, Union, Iterable, List
|
||||
|
||||
import albumentations as A
|
||||
import cv2 as cv
|
||||
import loguru
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from PIL import Image, ImageOps, ImageFont, ImageDraw
|
||||
from PIL.Image import Transpose
|
||||
from faker import Faker
|
||||
from loguru import logger
|
||||
from matplotlib import pyplot as plt
|
||||
from tabulate import tabulate
|
||||
|
||||
from cv_analysis.table_parsing import isolate_vertical_and_horizontal_components
|
||||
from cv_analysis.utils import star, rconj
|
||||
from cv_analysis.utils.common import normalize_to_gray_scale
|
||||
from cv_analysis.utils.merging import merge_related_rectangles
|
||||
from cv_analysis.utils.postprocessing import remove_overlapping, remove_included
|
||||
|
||||
@ -97,7 +98,8 @@ from funcy import (
|
||||
lfilter,
|
||||
lzip,
|
||||
keep,
|
||||
lkeep,
|
||||
repeatedly,
|
||||
mapcat,
|
||||
)
|
||||
|
||||
from cv_analysis.locations import TEST_PAGE_TEXTURES_DIR
|
||||
@ -365,6 +367,9 @@ class ContentRectangle(Rectangle):
|
||||
super().__init__(x1, y1, x2, y2)
|
||||
self.content = content
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.__class__.__name__}({self.x1}, {self.y1}, {self.x2}, {self.y2}, content={self.content})"
|
||||
|
||||
|
||||
class ContentGenerator:
|
||||
def __init__(self):
|
||||
@ -380,7 +385,7 @@ class ContentGenerator:
|
||||
char_boxes = lfilter(is_square_like, char_boxes)
|
||||
text_boxes = merge_related_rectangles(text_boxes)
|
||||
|
||||
text_boxes = lmap(generate_random_text_block, text_boxes)
|
||||
text_boxes = lmap(generate_recursive_random_table, text_boxes)
|
||||
plots = lmap(generate_random_table, every_nth(2, char_boxes))
|
||||
tables = lmap(generate_random_table, every_nth(2, char_boxes[1:]))
|
||||
|
||||
@ -411,6 +416,12 @@ def generate_random_table(rectangle: Rectangle) -> ContentRectangle:
|
||||
return block
|
||||
|
||||
|
||||
def generate_recursive_random_table(rectangle: Rectangle) -> ContentRectangle:
|
||||
block = RecursiveRandomTable(*rectangle.coords)
|
||||
block.generate_random_table()
|
||||
return block
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_random_seed():
|
||||
return random.randint(0, 2**32 - 1)
|
||||
@ -423,6 +434,66 @@ class RandomContentRectangle(ContentRectangle):
|
||||
self.random = random.Random(self.seed)
|
||||
|
||||
|
||||
class RecursiveRandomTable(RandomContentRectangle):
|
||||
def __init__(self, x1, y1, x2, y2, seed=None):
|
||||
super().__init__(x1, y1, x2, y2, seed=seed)
|
||||
self.n_columns = max(self.width // 100, 1)
|
||||
self.n_rows = max(self.height // 100, 1)
|
||||
self.cell_size = (self.width // self.n_columns, self.height // self.n_rows)
|
||||
logger.debug(f"RecursiveRandomTable({self.n_columns}, {self.n_rows}, {self.cell_size}) at {self}")
|
||||
self.content = Image.new("RGB", (self.width, self.height), (255, 255, 255))
|
||||
|
||||
def generate_random_table(self):
|
||||
cells = list(self.generate_cells_with_content())
|
||||
self.content = paste_contents(self.content, cells)
|
||||
# convert to RGBA
|
||||
self.content = self.content.convert("RGBA")
|
||||
assert self.content.mode == "RGBA"
|
||||
self.draw_cell_borders(cells)
|
||||
|
||||
def generate_cells_with_content(self):
|
||||
for cell in self.generate_table():
|
||||
cell.content = generate_random_text_block(cell, n_sentences=1).content
|
||||
assert cell.content.mode == "RGBA"
|
||||
yield cell
|
||||
|
||||
def draw_cell_borders(self, cells):
|
||||
draw = ImageDraw.Draw(self.content)
|
||||
for cell in cells:
|
||||
draw.rectangle(cell.coords, outline=(0, 0, 0))
|
||||
|
||||
def draw_single_cell_borders(self, cell):
|
||||
draw = ImageDraw.Draw(self.content)
|
||||
draw.rectangle(cell.coords, outline=(1, 1, 0))
|
||||
|
||||
def generate_table(self) -> Iterable[ContentRectangle]:
|
||||
yield from mapcat(self.generate_column, range(self.n_columns))
|
||||
|
||||
def generate_column(self, column_index) -> Iterable[ContentRectangle]:
|
||||
logger.debug(f"Generating column {column_index}.")
|
||||
generate_cell_content_for_row = partial(self.generate_cell, column_index=column_index)
|
||||
yield from map(generate_cell_content_for_row, range(self.n_rows))
|
||||
|
||||
def generate_cell(self, row_index, column_index) -> ContentRectangle:
|
||||
w, h = self.cell_size
|
||||
x1, y1 = self.x1 + column_index * w, self.y1 + row_index * h
|
||||
x2, y2 = x1 + w, y1 + h
|
||||
logger.debug(f"Generating cell ({row_index}, {column_index}) at ({x1}, {y1}, {x2}, {y2}).")
|
||||
assert x1 >= self.x1
|
||||
assert y1 >= self.y1
|
||||
assert x2 <= self.x2
|
||||
assert y2 <= self.y2
|
||||
return ContentRectangle(x1, y1, x2, y2)
|
||||
|
||||
def generate_column_names(self):
|
||||
column_names = repeatedly(self.generate_column_name, self.n_columns)
|
||||
return column_names
|
||||
|
||||
def generate_column_name(self):
|
||||
column_name = Faker().words(random.randint(1, 3))
|
||||
return column_name
|
||||
|
||||
|
||||
class RandomTable(RandomContentRectangle):
|
||||
def __init__(self, x1, y1, x2, y2, seed=None):
|
||||
super().__init__(x1, y1, x2, y2, seed=seed)
|
||||
@ -436,6 +507,7 @@ class RandomTable(RandomContentRectangle):
|
||||
text_table = self.generate_random_ascii_table(rectangle)
|
||||
table_lines = text_table.split("\n")
|
||||
image = write_lines_to_image(table_lines, rectangle)
|
||||
self.join_lines(image)
|
||||
|
||||
self.content = image
|
||||
|
||||
@ -459,15 +531,21 @@ class RandomTable(RandomContentRectangle):
|
||||
|
||||
return df
|
||||
|
||||
def join_lines(self, table: Image.Image):
|
||||
table = normalize_image_format_to_array(table)
|
||||
table = normalize_to_gray_scale(table)
|
||||
grid = isolate_vertical_and_horizontal_components(table)
|
||||
# grid = cv2.bitwise_not(grid)
|
||||
|
||||
def generate_random_ascii_table(self, rectangle: Rectangle):
|
||||
df = self.generate_random_dataframe(rectangle)
|
||||
table_format = random.choice(
|
||||
[
|
||||
"simple",
|
||||
# "simple",
|
||||
"grid",
|
||||
"presto",
|
||||
"psql",
|
||||
"rst",
|
||||
# "presto",
|
||||
# "psql",
|
||||
# "rst",
|
||||
]
|
||||
)
|
||||
text_table = tabulate(df, headers="keys", tablefmt=table_format)
|
||||
@ -506,7 +584,7 @@ class RandomFontPicker:
|
||||
fonts = filter(self.font_is_renderable, fonts) # FIXME: this does not work
|
||||
|
||||
font = first(fonts)
|
||||
loguru.logger.debug(f"Using font: {font}")
|
||||
logger.debug(f"Using font: {font}")
|
||||
return font
|
||||
|
||||
def shuffle_fonts(self):
|
||||
@ -519,7 +597,7 @@ class RandomFontPicker:
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def load_font(self, font: str):
|
||||
loguru.logger.trace(f"Loading font: {font}")
|
||||
logger.trace(f"Loading font: {font}")
|
||||
try:
|
||||
return ImageFont.truetype(font, size=11)
|
||||
except OSError:
|
||||
@ -666,9 +744,9 @@ def maybe():
|
||||
return random.random() > 0.9
|
||||
|
||||
|
||||
def generate_random_text_block(rectangle: Rectangle) -> ContentRectangle:
|
||||
def generate_random_text_block(rectangle: Rectangle, n_sentences=3000) -> ContentRectangle:
|
||||
block = RandomTextBlock(*rectangle.coords)
|
||||
block.generate_random_text(rectangle)
|
||||
block.generate_random_text(rectangle, n_sentences)
|
||||
return block
|
||||
|
||||
|
||||
@ -697,10 +775,12 @@ class RandomTextBlock(ContentRectangle):
|
||||
def __call__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def generate_random_text(self, rectangle: Rectangle):
|
||||
lines = generate_random_text_lines(rectangle, self.format_lines)
|
||||
def generate_random_text(self, rectangle: Rectangle, n_sentences=3000):
|
||||
lines = generate_random_text_lines(rectangle, self.format_lines, n_sentences)
|
||||
image = write_lines_to_image(lines, rectangle, self.font)
|
||||
self.content = image
|
||||
assert self.content.mode == "RGBA"
|
||||
return self
|
||||
|
||||
def format_lines(self, lines, last_full):
|
||||
def truncate_current_line():
|
||||
@ -731,8 +811,8 @@ class RandomTextBlock(ContentRectangle):
|
||||
return line
|
||||
|
||||
|
||||
def generate_random_text_lines(rectangle: Rectangle, line_formatter=identity) -> List[str]:
|
||||
text = Faker().paragraph(nb_sentences=3000, variable_nb_sentences=False, ext_word_list=None)
|
||||
def generate_random_text_lines(rectangle: Rectangle, line_formatter=identity, n_sentences=3000) -> List[str]:
|
||||
text = Faker().paragraph(nb_sentences=n_sentences, variable_nb_sentences=False, ext_word_list=None)
|
||||
unformatted_lines = textwrap.wrap(text, width=rectangle.width, break_long_words=False)
|
||||
# each iteration of the line formatter function formats one more line and adds it to the back of the list
|
||||
formatted_lines_generator = iterate(star(line_formatter), (unformatted_lines, True))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user