From cee5e69a4b0e360849c328d1f2b1cde1a5cd8287 Mon Sep 17 00:00:00 2001 From: Matthias Bisping Date: Tue, 24 Jan 2023 13:06:59 +0100 Subject: [PATCH] Make page generation reproducable Tie all structural random events to a seeded random object. --- test/fixtures/page_generation/page.py | 80 ++++++++++++++------------- 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/test/fixtures/page_generation/page.py b/test/fixtures/page_generation/page.py index 6e322c7..1ecf61c 100644 --- a/test/fixtures/page_generation/page.py +++ b/test/fixtures/page_generation/page.py @@ -30,6 +30,10 @@ from cv_analysis.utils.merging import merge_related_rectangles from cv_analysis.utils.postprocessing import remove_overlapping, remove_included from cv_analysis.utils.spacial import area +random_seed = random.randint(0, 2**32 - 1) +rnd = random.Random(random_seed) +logger.info(f"Random seed: {random_seed}") + Image_t = Union[Image.Image, np.ndarray] # # transform = A.Compose( @@ -228,9 +232,9 @@ def color_intensity(request): def random_flip(image): - if random.choice([True, False]): + if rnd.choice([True, False]): image = image.transpose(Transpose.FLIP_LEFT_RIGHT) - if random.choice([True, False]): + if rnd.choice([True, False]): image = image.transpose(Transpose.FLIP_TOP_BOTTOM) return image @@ -380,10 +384,10 @@ def blank_page(texture, texture_fn) -> np.ndarray: """Creates a blank page with a given orientation and dpi.""" page = random_flip(texture) page = texture_fn(page) - page_partitioner = random.choice( + page_partitioner = rnd.choice( [ TwoColumnPagePartitioner(), - RandomPagePartitioner(), + # RandomPagePartitioner(), ] ) boxes = page_partitioner(page) @@ -410,7 +414,7 @@ class ContentGenerator: self.constrain_layouts = True def __call__(self, boxes: List[Rectangle]) -> Image: - random.shuffle(boxes) + rnd.shuffle(boxes) figure_boxes, text_boxes = lsplit(is_square_like, boxes) @@ -463,8 +467,8 @@ def generate_recursive_random_table_with_caption(rectangle: Rectangle): def split_into_figure_and_caption(rectangle: Rectangle): - gap_percentage = random.uniform(0, 0.03) - split_point = random.uniform(0.5, 0.9) + gap_percentage = rnd.uniform(0, 0.03) + split_point = rnd.uniform(0.5, 0.9) figure_box = Rectangle( rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y1 + rectangle.height * (split_point - gap_percentage / 2) ) @@ -502,7 +506,7 @@ def generate_recursive_random_table(rectangle: Rectangle, **kwargs) -> ContentRe @lru_cache(maxsize=None) def get_random_seed(): - return random.randint(0, 2**32 - 1) + return rnd.randint(0, 2**32 - 1) class RandomContentRectangle(ContentRectangle): @@ -517,7 +521,7 @@ class Size(Enum): # MEDIUM = sqrt((100 * 3) ** 2) # LARGE = sqrt((100 * 10) ** 2) - SMALL = 100 + SMALL = 60 MEDIUM = 180 LARGE = 300 @@ -540,22 +544,22 @@ def get_size(rectangle: Rectangle): class RecursiveRandomTable(RandomContentRectangle): def __init__(self, x1, y1, x2, y2, seed=None, border_width=1): super().__init__(x1, y1, x2, y2, seed=seed) - self.n_columns = random.randint(1, max(self.width // 100, 1)) - self.n_rows = random.randint(1, max(self.height // random.randint(17, 100), 1)) + self.n_columns = rnd.randint(1, max(self.width // 100, 1)) + self.n_rows = rnd.randint(1, max(self.height // rnd.randint(17, 100), 1)) self.cell_size = (self.width // self.n_columns, self.height // self.n_rows) self.content = Image.new("RGBA", (self.width, self.height), (255, 255, 255, 0)) - self.background_color = tuple([random.randint(0, 100) for _ in range(4)]) + self.background_color = tuple([rnd.randint(0, 100) for _ in range(4)]) self.cell_border_color = (*map(lambda x: int(x * 0.8), self.background_color[:3]), 255) - self.layout = random.choice(["closed", "horizontal", "vertical", "open"]) + self.layout = rnd.choice(["closed", "horizontal", "vertical", "open"]) # Overwrite the layout choice in some cases if self.n_columns == 1 and self.n_rows == 1: self.layout = "closed" elif self.n_columns == 1: - self.layout = random.choice(["vertical", "closed"]) + self.layout = rnd.choice(["vertical", "closed"]) elif self.n_rows == 1: - self.layout = random.choice(["horizontal", "closed"]) + self.layout = rnd.choice(["horizontal", "closed"]) logger.debug(f"Layout: {self.layout}") # self.draw_single_cell_borders(self, border_width, fill=(0, 0, 0, 0)) @@ -573,7 +577,7 @@ class RecursiveRandomTable(RandomContentRectangle): inner_region = shrink_rectangle(cell, 0.4) - choice = random.choice(["text", "plot", "recurse", "plain_table", "blank"]) + choice = rnd.choice(["text", "plot", "recurse", "plain_table", "blank"]) size = get_size(inner_region) if size <= Size.SMALL.value: @@ -582,7 +586,7 @@ class RecursiveRandomTable(RandomContentRectangle): elif size <= Size.MEDIUM.value: - choice = random.choice(["plot", "recurse"]) + choice = rnd.choice(["plot", "recurse"]) # if choice == "plain_table": # return generate_random_table(cell) @@ -597,7 +601,7 @@ class RecursiveRandomTable(RandomContentRectangle): elif size <= Size.LARGE.value: - choice = random.choice(["plot", "recurse"]) + choice = rnd.choice(["plot", "recurse"]) logger.debug(f"Generating {choice} {size:.0f} {get_size_class(cell).name}") if choice == "plot" and is_square_like(cell): @@ -669,7 +673,7 @@ class RecursiveRandomTable(RandomContentRectangle): def generate_random_words(n_min, n_max): - column_name = Faker().words(random.randint(n_min, n_max)) + column_name = Faker().words(rnd.randint(n_min, n_max)) return column_name @@ -749,7 +753,7 @@ class RandomTable(RandomContentRectangle): def generate_random_ascii_table(self, rectangle: Rectangle): df = self.generate_random_dataframe(rectangle) - table_format = random.choice( + table_format = rnd.choice( [ # "simple", "grid", @@ -818,7 +822,7 @@ class RandomFontPicker: def shuffle_fonts(self): l = lzip(self.fonts, self.fonts_lower) - random.shuffle(l) + rnd.shuffle(l) self.fonts, self.fonts_lower = lzip(*l) def pick_random_mono_space_font_available_on_system(self) -> ImageFont: @@ -889,7 +893,7 @@ class RandomPlot(RandomContentRectangle): def generate_random_plot(self, rectangle: Rectangle): # noinspection PyArgumentList - random.choice( + rnd.choice( [ self.generate_random_line_plot, self.generate_random_bar_plot, @@ -905,7 +909,7 @@ class RandomPlot(RandomContentRectangle): self.__generate_random_plot(plt.bar, rectangle, x, y) def generate_random_line_plot(self, rectangle: Rectangle): - f = random.choice([np.sin, np.cos, np.tan, np.exp, np.log, np.sqrt, np.square]) + f = rnd.choice([np.sin, np.cos, np.tan, np.exp, np.log, np.sqrt, np.square]) x = np.linspace(0, 10, 100) y = f(x) @@ -928,9 +932,9 @@ class RandomPlot(RandomContentRectangle): def generate_plot_kwargs(self, keywords=None): kwargs = { - "color": random.choice(self.cmap.colors), - "linestyle": random.choice(["-", "--", "-.", ":"]), - "linewidth": random.uniform(0.5, 2), + "color": rnd.choice(self.cmap.colors), + "linestyle": rnd.choice(["-", "--", "-.", ":"]), + "linewidth": rnd.uniform(0.5, 2), } return kwargs if not keywords else {k: v for k, v in kwargs.items() if k in keywords} @@ -967,7 +971,7 @@ class RandomPlot(RandomContentRectangle): def maybe(): - return random.random() > 0.9 + return rnd.random() > 0.9 def generate_random_text_block(rectangle: Rectangle, n_sentences=3000) -> ContentRectangle: @@ -985,11 +989,11 @@ def generate_random_text_block(rectangle: Rectangle, n_sentences=3000) -> Conten def generate_random_image_caption(rectangle: Rectangle) -> ContentRectangle: - return generate_random_caption(rectangle, f"Fig {random.randint(1, 20)}") + return generate_random_caption(rectangle, f"Fig {rnd.randint(1, 20)}") def generate_random_table_caption(rectangle: Rectangle) -> ContentRectangle: - return generate_random_caption(rectangle, f"Tabl {random.randint(1, 20)}") + return generate_random_caption(rectangle, f"Tabl {rnd.randint(1, 20)}") def generate_random_caption(rectangle: Rectangle, caption_start, n_sentences=1000) -> ContentRectangle: @@ -1051,14 +1055,14 @@ class IdentityLineFormatter(LineFormatter): class ParagraphLineFormatter(LineFormatter): def __init__(self, blank_line_percentage=None): - self.blank_line_percentage = blank_line_percentage or random.uniform(0, 0.5) + self.blank_line_percentage = blank_line_percentage or rnd.uniform(0, 0.5) def __call__(self, lines, last_full): return self.format_lines(lines, last_full) def format_lines(self, lines, last_full): def truncate_current_line(): - return random.random() < self.blank_line_percentage and last_full + return rnd.random() < self.blank_line_percentage and last_full # This is meant to be read from the bottom up. current_line_shall_not_be_a_full_line = truncate_current_line() @@ -1079,7 +1083,7 @@ class ParagraphLineFormatter(LineFormatter): return line, full def truncate_line(self, line: str): - n_trailing_words = random.randint(0, 4) + n_trailing_words = rnd.randint(0, 4) line = " ".join(line.split()[-n_trailing_words - 1 : -1]).replace(".", "") line = line + ".\n" if line else line return line @@ -1091,7 +1095,7 @@ class TextBlockGenerator(abc.ABC): class ParagraphGenerator(TextBlockGenerator): def __init__(self): - self.line_formatter = ParagraphLineFormatter(blank_line_percentage=random.uniform(0, 0.5)) + self.line_formatter = ParagraphLineFormatter(blank_line_percentage=rnd.uniform(0, 0.5)) def __call__(self, rectangle, n_sentences): return self.generate_paragraph(rectangle, n_sentences) @@ -1104,7 +1108,7 @@ class ParagraphGenerator(TextBlockGenerator): class CaptionGenerator(TextBlockGenerator): def __init__(self, caption_start=None): self.line_formatter = IdentityLineFormatter() - self.caption_start = caption_start or f"Fig {random.randint(1, 20)}" + self.caption_start = caption_start or f"Fig {rnd.randint(1, 20)}" def __call__(self, rectangle, n_sentences): return self.generate_paragraph(rectangle, n_sentences) @@ -1216,7 +1220,7 @@ class PagePartitioner(abc.ABC): return child_boxes def recurse(self, depth): - return random.random() <= self.recursion_probability(depth) + return rnd.random() <= self.recursion_probability(depth) def recursion_probability(self, depth): return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth @@ -1232,8 +1236,8 @@ class RandomPagePartitioner(PagePartitioner): else: child_boxes = self.generate_child_boxes( box, - axis=random.choice(["x", "y"]), - split_percentage=random.uniform(0.3, 0.7), + axis=rnd.choice(["x", "y"]), + split_percentage=rnd.uniform(0.3, 0.7), ) if self.recurse(depth): yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes) @@ -1259,7 +1263,7 @@ class TwoColumnPagePartitioner(PagePartitioner): split_percentage = 0.5 else: axis = "y" - split_percentage = random.choice([0.3, 0.7]) + split_percentage = rnd.choice([0.3, 0.7]) child_boxes = self.generate_child_boxes(box, axis=axis, split_percentage=split_percentage)