Make page generation reproducable
Tie all structural random events to a seeded random object.
This commit is contained in:
parent
e715c86f8d
commit
cee5e69a4b
80
test/fixtures/page_generation/page.py
vendored
80
test/fixtures/page_generation/page.py
vendored
@ -30,6 +30,10 @@ from cv_analysis.utils.merging import merge_related_rectangles
|
||||
from cv_analysis.utils.postprocessing import remove_overlapping, remove_included
|
||||
from cv_analysis.utils.spacial import area
|
||||
|
||||
random_seed = random.randint(0, 2**32 - 1)
|
||||
rnd = random.Random(random_seed)
|
||||
logger.info(f"Random seed: {random_seed}")
|
||||
|
||||
Image_t = Union[Image.Image, np.ndarray]
|
||||
#
|
||||
# transform = A.Compose(
|
||||
@ -228,9 +232,9 @@ def color_intensity(request):
|
||||
|
||||
|
||||
def random_flip(image):
|
||||
if random.choice([True, False]):
|
||||
if rnd.choice([True, False]):
|
||||
image = image.transpose(Transpose.FLIP_LEFT_RIGHT)
|
||||
if random.choice([True, False]):
|
||||
if rnd.choice([True, False]):
|
||||
image = image.transpose(Transpose.FLIP_TOP_BOTTOM)
|
||||
return image
|
||||
|
||||
@ -380,10 +384,10 @@ def blank_page(texture, texture_fn) -> np.ndarray:
|
||||
"""Creates a blank page with a given orientation and dpi."""
|
||||
page = random_flip(texture)
|
||||
page = texture_fn(page)
|
||||
page_partitioner = random.choice(
|
||||
page_partitioner = rnd.choice(
|
||||
[
|
||||
TwoColumnPagePartitioner(),
|
||||
RandomPagePartitioner(),
|
||||
# RandomPagePartitioner(),
|
||||
]
|
||||
)
|
||||
boxes = page_partitioner(page)
|
||||
@ -410,7 +414,7 @@ class ContentGenerator:
|
||||
self.constrain_layouts = True
|
||||
|
||||
def __call__(self, boxes: List[Rectangle]) -> Image:
|
||||
random.shuffle(boxes)
|
||||
rnd.shuffle(boxes)
|
||||
|
||||
figure_boxes, text_boxes = lsplit(is_square_like, boxes)
|
||||
|
||||
@ -463,8 +467,8 @@ def generate_recursive_random_table_with_caption(rectangle: Rectangle):
|
||||
|
||||
|
||||
def split_into_figure_and_caption(rectangle: Rectangle):
|
||||
gap_percentage = random.uniform(0, 0.03)
|
||||
split_point = random.uniform(0.5, 0.9)
|
||||
gap_percentage = rnd.uniform(0, 0.03)
|
||||
split_point = rnd.uniform(0.5, 0.9)
|
||||
figure_box = Rectangle(
|
||||
rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y1 + rectangle.height * (split_point - gap_percentage / 2)
|
||||
)
|
||||
@ -502,7 +506,7 @@ def generate_recursive_random_table(rectangle: Rectangle, **kwargs) -> ContentRe
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_random_seed():
|
||||
return random.randint(0, 2**32 - 1)
|
||||
return rnd.randint(0, 2**32 - 1)
|
||||
|
||||
|
||||
class RandomContentRectangle(ContentRectangle):
|
||||
@ -517,7 +521,7 @@ class Size(Enum):
|
||||
# MEDIUM = sqrt((100 * 3) ** 2)
|
||||
# LARGE = sqrt((100 * 10) ** 2)
|
||||
|
||||
SMALL = 100
|
||||
SMALL = 60
|
||||
MEDIUM = 180
|
||||
LARGE = 300
|
||||
|
||||
@ -540,22 +544,22 @@ def get_size(rectangle: Rectangle):
|
||||
class RecursiveRandomTable(RandomContentRectangle):
|
||||
def __init__(self, x1, y1, x2, y2, seed=None, border_width=1):
|
||||
super().__init__(x1, y1, x2, y2, seed=seed)
|
||||
self.n_columns = random.randint(1, max(self.width // 100, 1))
|
||||
self.n_rows = random.randint(1, max(self.height // random.randint(17, 100), 1))
|
||||
self.n_columns = rnd.randint(1, max(self.width // 100, 1))
|
||||
self.n_rows = rnd.randint(1, max(self.height // rnd.randint(17, 100), 1))
|
||||
self.cell_size = (self.width // self.n_columns, self.height // self.n_rows)
|
||||
|
||||
self.content = Image.new("RGBA", (self.width, self.height), (255, 255, 255, 0))
|
||||
self.background_color = tuple([random.randint(0, 100) for _ in range(4)])
|
||||
self.background_color = tuple([rnd.randint(0, 100) for _ in range(4)])
|
||||
self.cell_border_color = (*map(lambda x: int(x * 0.8), self.background_color[:3]), 255)
|
||||
self.layout = random.choice(["closed", "horizontal", "vertical", "open"])
|
||||
self.layout = rnd.choice(["closed", "horizontal", "vertical", "open"])
|
||||
|
||||
# Overwrite the layout choice in some cases
|
||||
if self.n_columns == 1 and self.n_rows == 1:
|
||||
self.layout = "closed"
|
||||
elif self.n_columns == 1:
|
||||
self.layout = random.choice(["vertical", "closed"])
|
||||
self.layout = rnd.choice(["vertical", "closed"])
|
||||
elif self.n_rows == 1:
|
||||
self.layout = random.choice(["horizontal", "closed"])
|
||||
self.layout = rnd.choice(["horizontal", "closed"])
|
||||
|
||||
logger.debug(f"Layout: {self.layout}")
|
||||
# self.draw_single_cell_borders(self, border_width, fill=(0, 0, 0, 0))
|
||||
@ -573,7 +577,7 @@ class RecursiveRandomTable(RandomContentRectangle):
|
||||
|
||||
inner_region = shrink_rectangle(cell, 0.4)
|
||||
|
||||
choice = random.choice(["text", "plot", "recurse", "plain_table", "blank"])
|
||||
choice = rnd.choice(["text", "plot", "recurse", "plain_table", "blank"])
|
||||
size = get_size(inner_region)
|
||||
|
||||
if size <= Size.SMALL.value:
|
||||
@ -582,7 +586,7 @@ class RecursiveRandomTable(RandomContentRectangle):
|
||||
|
||||
elif size <= Size.MEDIUM.value:
|
||||
|
||||
choice = random.choice(["plot", "recurse"])
|
||||
choice = rnd.choice(["plot", "recurse"])
|
||||
|
||||
# if choice == "plain_table":
|
||||
# return generate_random_table(cell)
|
||||
@ -597,7 +601,7 @@ class RecursiveRandomTable(RandomContentRectangle):
|
||||
|
||||
elif size <= Size.LARGE.value:
|
||||
|
||||
choice = random.choice(["plot", "recurse"])
|
||||
choice = rnd.choice(["plot", "recurse"])
|
||||
|
||||
logger.debug(f"Generating {choice} {size:.0f} {get_size_class(cell).name}")
|
||||
if choice == "plot" and is_square_like(cell):
|
||||
@ -669,7 +673,7 @@ class RecursiveRandomTable(RandomContentRectangle):
|
||||
|
||||
|
||||
def generate_random_words(n_min, n_max):
|
||||
column_name = Faker().words(random.randint(n_min, n_max))
|
||||
column_name = Faker().words(rnd.randint(n_min, n_max))
|
||||
return column_name
|
||||
|
||||
|
||||
@ -749,7 +753,7 @@ class RandomTable(RandomContentRectangle):
|
||||
|
||||
def generate_random_ascii_table(self, rectangle: Rectangle):
|
||||
df = self.generate_random_dataframe(rectangle)
|
||||
table_format = random.choice(
|
||||
table_format = rnd.choice(
|
||||
[
|
||||
# "simple",
|
||||
"grid",
|
||||
@ -818,7 +822,7 @@ class RandomFontPicker:
|
||||
|
||||
def shuffle_fonts(self):
|
||||
l = lzip(self.fonts, self.fonts_lower)
|
||||
random.shuffle(l)
|
||||
rnd.shuffle(l)
|
||||
self.fonts, self.fonts_lower = lzip(*l)
|
||||
|
||||
def pick_random_mono_space_font_available_on_system(self) -> ImageFont:
|
||||
@ -889,7 +893,7 @@ class RandomPlot(RandomContentRectangle):
|
||||
|
||||
def generate_random_plot(self, rectangle: Rectangle):
|
||||
# noinspection PyArgumentList
|
||||
random.choice(
|
||||
rnd.choice(
|
||||
[
|
||||
self.generate_random_line_plot,
|
||||
self.generate_random_bar_plot,
|
||||
@ -905,7 +909,7 @@ class RandomPlot(RandomContentRectangle):
|
||||
self.__generate_random_plot(plt.bar, rectangle, x, y)
|
||||
|
||||
def generate_random_line_plot(self, rectangle: Rectangle):
|
||||
f = random.choice([np.sin, np.cos, np.tan, np.exp, np.log, np.sqrt, np.square])
|
||||
f = rnd.choice([np.sin, np.cos, np.tan, np.exp, np.log, np.sqrt, np.square])
|
||||
|
||||
x = np.linspace(0, 10, 100)
|
||||
y = f(x)
|
||||
@ -928,9 +932,9 @@ class RandomPlot(RandomContentRectangle):
|
||||
def generate_plot_kwargs(self, keywords=None):
|
||||
|
||||
kwargs = {
|
||||
"color": random.choice(self.cmap.colors),
|
||||
"linestyle": random.choice(["-", "--", "-.", ":"]),
|
||||
"linewidth": random.uniform(0.5, 2),
|
||||
"color": rnd.choice(self.cmap.colors),
|
||||
"linestyle": rnd.choice(["-", "--", "-.", ":"]),
|
||||
"linewidth": rnd.uniform(0.5, 2),
|
||||
}
|
||||
|
||||
return kwargs if not keywords else {k: v for k, v in kwargs.items() if k in keywords}
|
||||
@ -967,7 +971,7 @@ class RandomPlot(RandomContentRectangle):
|
||||
|
||||
|
||||
def maybe():
|
||||
return random.random() > 0.9
|
||||
return rnd.random() > 0.9
|
||||
|
||||
|
||||
def generate_random_text_block(rectangle: Rectangle, n_sentences=3000) -> ContentRectangle:
|
||||
@ -985,11 +989,11 @@ def generate_random_text_block(rectangle: Rectangle, n_sentences=3000) -> Conten
|
||||
|
||||
|
||||
def generate_random_image_caption(rectangle: Rectangle) -> ContentRectangle:
|
||||
return generate_random_caption(rectangle, f"Fig {random.randint(1, 20)}")
|
||||
return generate_random_caption(rectangle, f"Fig {rnd.randint(1, 20)}")
|
||||
|
||||
|
||||
def generate_random_table_caption(rectangle: Rectangle) -> ContentRectangle:
|
||||
return generate_random_caption(rectangle, f"Tabl {random.randint(1, 20)}")
|
||||
return generate_random_caption(rectangle, f"Tabl {rnd.randint(1, 20)}")
|
||||
|
||||
|
||||
def generate_random_caption(rectangle: Rectangle, caption_start, n_sentences=1000) -> ContentRectangle:
|
||||
@ -1051,14 +1055,14 @@ class IdentityLineFormatter(LineFormatter):
|
||||
|
||||
class ParagraphLineFormatter(LineFormatter):
|
||||
def __init__(self, blank_line_percentage=None):
|
||||
self.blank_line_percentage = blank_line_percentage or random.uniform(0, 0.5)
|
||||
self.blank_line_percentage = blank_line_percentage or rnd.uniform(0, 0.5)
|
||||
|
||||
def __call__(self, lines, last_full):
|
||||
return self.format_lines(lines, last_full)
|
||||
|
||||
def format_lines(self, lines, last_full):
|
||||
def truncate_current_line():
|
||||
return random.random() < self.blank_line_percentage and last_full
|
||||
return rnd.random() < self.blank_line_percentage and last_full
|
||||
|
||||
# This is meant to be read from the bottom up.
|
||||
current_line_shall_not_be_a_full_line = truncate_current_line()
|
||||
@ -1079,7 +1083,7 @@ class ParagraphLineFormatter(LineFormatter):
|
||||
return line, full
|
||||
|
||||
def truncate_line(self, line: str):
|
||||
n_trailing_words = random.randint(0, 4)
|
||||
n_trailing_words = rnd.randint(0, 4)
|
||||
line = " ".join(line.split()[-n_trailing_words - 1 : -1]).replace(".", "")
|
||||
line = line + ".\n" if line else line
|
||||
return line
|
||||
@ -1091,7 +1095,7 @@ class TextBlockGenerator(abc.ABC):
|
||||
|
||||
class ParagraphGenerator(TextBlockGenerator):
|
||||
def __init__(self):
|
||||
self.line_formatter = ParagraphLineFormatter(blank_line_percentage=random.uniform(0, 0.5))
|
||||
self.line_formatter = ParagraphLineFormatter(blank_line_percentage=rnd.uniform(0, 0.5))
|
||||
|
||||
def __call__(self, rectangle, n_sentences):
|
||||
return self.generate_paragraph(rectangle, n_sentences)
|
||||
@ -1104,7 +1108,7 @@ class ParagraphGenerator(TextBlockGenerator):
|
||||
class CaptionGenerator(TextBlockGenerator):
|
||||
def __init__(self, caption_start=None):
|
||||
self.line_formatter = IdentityLineFormatter()
|
||||
self.caption_start = caption_start or f"Fig {random.randint(1, 20)}"
|
||||
self.caption_start = caption_start or f"Fig {rnd.randint(1, 20)}"
|
||||
|
||||
def __call__(self, rectangle, n_sentences):
|
||||
return self.generate_paragraph(rectangle, n_sentences)
|
||||
@ -1216,7 +1220,7 @@ class PagePartitioner(abc.ABC):
|
||||
return child_boxes
|
||||
|
||||
def recurse(self, depth):
|
||||
return random.random() <= self.recursion_probability(depth)
|
||||
return rnd.random() <= self.recursion_probability(depth)
|
||||
|
||||
def recursion_probability(self, depth):
|
||||
return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth
|
||||
@ -1232,8 +1236,8 @@ class RandomPagePartitioner(PagePartitioner):
|
||||
else:
|
||||
child_boxes = self.generate_child_boxes(
|
||||
box,
|
||||
axis=random.choice(["x", "y"]),
|
||||
split_percentage=random.uniform(0.3, 0.7),
|
||||
axis=rnd.choice(["x", "y"]),
|
||||
split_percentage=rnd.uniform(0.3, 0.7),
|
||||
)
|
||||
if self.recurse(depth):
|
||||
yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
|
||||
@ -1259,7 +1263,7 @@ class TwoColumnPagePartitioner(PagePartitioner):
|
||||
split_percentage = 0.5
|
||||
else:
|
||||
axis = "y"
|
||||
split_percentage = random.choice([0.3, 0.7])
|
||||
split_percentage = rnd.choice([0.3, 0.7])
|
||||
|
||||
child_boxes = self.generate_child_boxes(box, axis=axis, split_percentage=split_percentage)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user