Make page generation reproducable

Tie all structural random events to a seeded random object.
This commit is contained in:
Matthias Bisping 2023-01-24 13:06:59 +01:00
parent e715c86f8d
commit cee5e69a4b

View File

@ -30,6 +30,10 @@ from cv_analysis.utils.merging import merge_related_rectangles
from cv_analysis.utils.postprocessing import remove_overlapping, remove_included
from cv_analysis.utils.spacial import area
random_seed = random.randint(0, 2**32 - 1)
rnd = random.Random(random_seed)
logger.info(f"Random seed: {random_seed}")
Image_t = Union[Image.Image, np.ndarray]
#
# transform = A.Compose(
@ -228,9 +232,9 @@ def color_intensity(request):
def random_flip(image):
if random.choice([True, False]):
if rnd.choice([True, False]):
image = image.transpose(Transpose.FLIP_LEFT_RIGHT)
if random.choice([True, False]):
if rnd.choice([True, False]):
image = image.transpose(Transpose.FLIP_TOP_BOTTOM)
return image
@ -380,10 +384,10 @@ def blank_page(texture, texture_fn) -> np.ndarray:
"""Creates a blank page with a given orientation and dpi."""
page = random_flip(texture)
page = texture_fn(page)
page_partitioner = random.choice(
page_partitioner = rnd.choice(
[
TwoColumnPagePartitioner(),
RandomPagePartitioner(),
# RandomPagePartitioner(),
]
)
boxes = page_partitioner(page)
@ -410,7 +414,7 @@ class ContentGenerator:
self.constrain_layouts = True
def __call__(self, boxes: List[Rectangle]) -> Image:
random.shuffle(boxes)
rnd.shuffle(boxes)
figure_boxes, text_boxes = lsplit(is_square_like, boxes)
@ -463,8 +467,8 @@ def generate_recursive_random_table_with_caption(rectangle: Rectangle):
def split_into_figure_and_caption(rectangle: Rectangle):
gap_percentage = random.uniform(0, 0.03)
split_point = random.uniform(0.5, 0.9)
gap_percentage = rnd.uniform(0, 0.03)
split_point = rnd.uniform(0.5, 0.9)
figure_box = Rectangle(
rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y1 + rectangle.height * (split_point - gap_percentage / 2)
)
@ -502,7 +506,7 @@ def generate_recursive_random_table(rectangle: Rectangle, **kwargs) -> ContentRe
@lru_cache(maxsize=None)
def get_random_seed():
return random.randint(0, 2**32 - 1)
return rnd.randint(0, 2**32 - 1)
class RandomContentRectangle(ContentRectangle):
@ -517,7 +521,7 @@ class Size(Enum):
# MEDIUM = sqrt((100 * 3) ** 2)
# LARGE = sqrt((100 * 10) ** 2)
SMALL = 100
SMALL = 60
MEDIUM = 180
LARGE = 300
@ -540,22 +544,22 @@ def get_size(rectangle: Rectangle):
class RecursiveRandomTable(RandomContentRectangle):
def __init__(self, x1, y1, x2, y2, seed=None, border_width=1):
super().__init__(x1, y1, x2, y2, seed=seed)
self.n_columns = random.randint(1, max(self.width // 100, 1))
self.n_rows = random.randint(1, max(self.height // random.randint(17, 100), 1))
self.n_columns = rnd.randint(1, max(self.width // 100, 1))
self.n_rows = rnd.randint(1, max(self.height // rnd.randint(17, 100), 1))
self.cell_size = (self.width // self.n_columns, self.height // self.n_rows)
self.content = Image.new("RGBA", (self.width, self.height), (255, 255, 255, 0))
self.background_color = tuple([random.randint(0, 100) for _ in range(4)])
self.background_color = tuple([rnd.randint(0, 100) for _ in range(4)])
self.cell_border_color = (*map(lambda x: int(x * 0.8), self.background_color[:3]), 255)
self.layout = random.choice(["closed", "horizontal", "vertical", "open"])
self.layout = rnd.choice(["closed", "horizontal", "vertical", "open"])
# Overwrite the layout choice in some cases
if self.n_columns == 1 and self.n_rows == 1:
self.layout = "closed"
elif self.n_columns == 1:
self.layout = random.choice(["vertical", "closed"])
self.layout = rnd.choice(["vertical", "closed"])
elif self.n_rows == 1:
self.layout = random.choice(["horizontal", "closed"])
self.layout = rnd.choice(["horizontal", "closed"])
logger.debug(f"Layout: {self.layout}")
# self.draw_single_cell_borders(self, border_width, fill=(0, 0, 0, 0))
@ -573,7 +577,7 @@ class RecursiveRandomTable(RandomContentRectangle):
inner_region = shrink_rectangle(cell, 0.4)
choice = random.choice(["text", "plot", "recurse", "plain_table", "blank"])
choice = rnd.choice(["text", "plot", "recurse", "plain_table", "blank"])
size = get_size(inner_region)
if size <= Size.SMALL.value:
@ -582,7 +586,7 @@ class RecursiveRandomTable(RandomContentRectangle):
elif size <= Size.MEDIUM.value:
choice = random.choice(["plot", "recurse"])
choice = rnd.choice(["plot", "recurse"])
# if choice == "plain_table":
# return generate_random_table(cell)
@ -597,7 +601,7 @@ class RecursiveRandomTable(RandomContentRectangle):
elif size <= Size.LARGE.value:
choice = random.choice(["plot", "recurse"])
choice = rnd.choice(["plot", "recurse"])
logger.debug(f"Generating {choice} {size:.0f} {get_size_class(cell).name}")
if choice == "plot" and is_square_like(cell):
@ -669,7 +673,7 @@ class RecursiveRandomTable(RandomContentRectangle):
def generate_random_words(n_min, n_max):
column_name = Faker().words(random.randint(n_min, n_max))
column_name = Faker().words(rnd.randint(n_min, n_max))
return column_name
@ -749,7 +753,7 @@ class RandomTable(RandomContentRectangle):
def generate_random_ascii_table(self, rectangle: Rectangle):
df = self.generate_random_dataframe(rectangle)
table_format = random.choice(
table_format = rnd.choice(
[
# "simple",
"grid",
@ -818,7 +822,7 @@ class RandomFontPicker:
def shuffle_fonts(self):
l = lzip(self.fonts, self.fonts_lower)
random.shuffle(l)
rnd.shuffle(l)
self.fonts, self.fonts_lower = lzip(*l)
def pick_random_mono_space_font_available_on_system(self) -> ImageFont:
@ -889,7 +893,7 @@ class RandomPlot(RandomContentRectangle):
def generate_random_plot(self, rectangle: Rectangle):
# noinspection PyArgumentList
random.choice(
rnd.choice(
[
self.generate_random_line_plot,
self.generate_random_bar_plot,
@ -905,7 +909,7 @@ class RandomPlot(RandomContentRectangle):
self.__generate_random_plot(plt.bar, rectangle, x, y)
def generate_random_line_plot(self, rectangle: Rectangle):
f = random.choice([np.sin, np.cos, np.tan, np.exp, np.log, np.sqrt, np.square])
f = rnd.choice([np.sin, np.cos, np.tan, np.exp, np.log, np.sqrt, np.square])
x = np.linspace(0, 10, 100)
y = f(x)
@ -928,9 +932,9 @@ class RandomPlot(RandomContentRectangle):
def generate_plot_kwargs(self, keywords=None):
kwargs = {
"color": random.choice(self.cmap.colors),
"linestyle": random.choice(["-", "--", "-.", ":"]),
"linewidth": random.uniform(0.5, 2),
"color": rnd.choice(self.cmap.colors),
"linestyle": rnd.choice(["-", "--", "-.", ":"]),
"linewidth": rnd.uniform(0.5, 2),
}
return kwargs if not keywords else {k: v for k, v in kwargs.items() if k in keywords}
@ -967,7 +971,7 @@ class RandomPlot(RandomContentRectangle):
def maybe():
return random.random() > 0.9
return rnd.random() > 0.9
def generate_random_text_block(rectangle: Rectangle, n_sentences=3000) -> ContentRectangle:
@ -985,11 +989,11 @@ def generate_random_text_block(rectangle: Rectangle, n_sentences=3000) -> Conten
def generate_random_image_caption(rectangle: Rectangle) -> ContentRectangle:
return generate_random_caption(rectangle, f"Fig {random.randint(1, 20)}")
return generate_random_caption(rectangle, f"Fig {rnd.randint(1, 20)}")
def generate_random_table_caption(rectangle: Rectangle) -> ContentRectangle:
return generate_random_caption(rectangle, f"Tabl {random.randint(1, 20)}")
return generate_random_caption(rectangle, f"Tabl {rnd.randint(1, 20)}")
def generate_random_caption(rectangle: Rectangle, caption_start, n_sentences=1000) -> ContentRectangle:
@ -1051,14 +1055,14 @@ class IdentityLineFormatter(LineFormatter):
class ParagraphLineFormatter(LineFormatter):
def __init__(self, blank_line_percentage=None):
self.blank_line_percentage = blank_line_percentage or random.uniform(0, 0.5)
self.blank_line_percentage = blank_line_percentage or rnd.uniform(0, 0.5)
def __call__(self, lines, last_full):
return self.format_lines(lines, last_full)
def format_lines(self, lines, last_full):
def truncate_current_line():
return random.random() < self.blank_line_percentage and last_full
return rnd.random() < self.blank_line_percentage and last_full
# This is meant to be read from the bottom up.
current_line_shall_not_be_a_full_line = truncate_current_line()
@ -1079,7 +1083,7 @@ class ParagraphLineFormatter(LineFormatter):
return line, full
def truncate_line(self, line: str):
n_trailing_words = random.randint(0, 4)
n_trailing_words = rnd.randint(0, 4)
line = " ".join(line.split()[-n_trailing_words - 1 : -1]).replace(".", "")
line = line + ".\n" if line else line
return line
@ -1091,7 +1095,7 @@ class TextBlockGenerator(abc.ABC):
class ParagraphGenerator(TextBlockGenerator):
def __init__(self):
self.line_formatter = ParagraphLineFormatter(blank_line_percentage=random.uniform(0, 0.5))
self.line_formatter = ParagraphLineFormatter(blank_line_percentage=rnd.uniform(0, 0.5))
def __call__(self, rectangle, n_sentences):
return self.generate_paragraph(rectangle, n_sentences)
@ -1104,7 +1108,7 @@ class ParagraphGenerator(TextBlockGenerator):
class CaptionGenerator(TextBlockGenerator):
def __init__(self, caption_start=None):
self.line_formatter = IdentityLineFormatter()
self.caption_start = caption_start or f"Fig {random.randint(1, 20)}"
self.caption_start = caption_start or f"Fig {rnd.randint(1, 20)}"
def __call__(self, rectangle, n_sentences):
return self.generate_paragraph(rectangle, n_sentences)
@ -1216,7 +1220,7 @@ class PagePartitioner(abc.ABC):
return child_boxes
def recurse(self, depth):
return random.random() <= self.recursion_probability(depth)
return rnd.random() <= self.recursion_probability(depth)
def recursion_probability(self, depth):
return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth
@ -1232,8 +1236,8 @@ class RandomPagePartitioner(PagePartitioner):
else:
child_boxes = self.generate_child_boxes(
box,
axis=random.choice(["x", "y"]),
split_percentage=random.uniform(0.3, 0.7),
axis=rnd.choice(["x", "y"]),
split_percentage=rnd.uniform(0.3, 0.7),
)
if self.recurse(depth):
yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
@ -1259,7 +1263,7 @@ class TwoColumnPagePartitioner(PagePartitioner):
split_percentage = 0.5
else:
axis = "y"
split_percentage = random.choice([0.3, 0.7])
split_percentage = rnd.choice([0.3, 0.7])
child_boxes = self.generate_child_boxes(box, axis=axis, split_percentage=split_percentage)