diff --git a/synthesis/segment/segments.py b/synthesis/segment/segments.py index be069e3..adfb078 100644 --- a/synthesis/segment/segments.py +++ b/synthesis/segment/segments.py @@ -1,8 +1,10 @@ from cv_analysis.utils.rectangle import Rectangle +from synthesis.random import probably, rnd from synthesis.segment.content_rectangle import ContentRectangle from synthesis.segment.plot import RandomPlot from synthesis.segment.text_block import TextBlock from synthesis.text.font import pick_random_font_available_on_system +from synthesis.text.text_block_generator.caption import CaptionGenerator def generate_random_plot(rectangle: Rectangle) -> ContentRectangle: @@ -34,3 +36,66 @@ def generate_text_block(rectangle: Rectangle, text) -> ContentRectangle: block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor block.put_text(text, rectangle) return block + + +def generate_random_plot_with_caption(rectangle: Rectangle): + plot_box, caption_box = split_into_figure_and_caption(rectangle) + plot_box = generate_random_plot(plot_box) + caption_box = generate_random_image_caption(caption_box) + return plot_box, caption_box + + +def generate_recursive_random_table_with_caption(rectangle: Rectangle): + table_box, caption_box = split_into_figure_and_caption(rectangle) + table_box = generate_recursive_random_table(table_box, double_rule=probably()) + caption_box = generate_random_table_caption(caption_box) + return table_box, caption_box + + +def split_into_figure_and_caption(rectangle: Rectangle): + gap_percentage = rnd.uniform(0, 0.03) + split_point = rnd.uniform(0.5, 0.9) + figure_box = Rectangle( + rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y1 + rectangle.height * (split_point - gap_percentage / 2) + ) + caption_box = Rectangle( + rectangle.x1, rectangle.y1 + rectangle.height * (split_point + gap_percentage / 2), rectangle.x2, rectangle.y2 + ) + return figure_box, caption_box + + +def generate_random_text_block(rectangle: Rectangle, n_sentences=3000) -> ContentRectangle: + block = TextBlock( + *rectangle.coords, + font=pick_random_font_available_on_system( + includes=("serif", "sans-serif"), + excludes=("bold", "mono", "italic", "oblique", "cursive"), + ), + font_size=30, # TODO: De-hardcode font size... Seems to have no effect on top of that + ) + block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor + block.generate_random_text(rectangle, n_sentences) + return block + + +def generate_random_image_caption(rectangle: Rectangle) -> ContentRectangle: + return generate_random_caption(rectangle, f"Fig {rnd.randint(1, 20)}") + + +def generate_random_table_caption(rectangle: Rectangle) -> ContentRectangle: + return generate_random_caption(rectangle, f"Tabl {rnd.randint(1, 20)}") + + +def generate_random_caption(rectangle: Rectangle, caption_start, n_sentences=1000) -> ContentRectangle: + block = TextBlock( + *rectangle.coords, + text_generator=CaptionGenerator(caption_start=caption_start), + font=pick_random_font_available_on_system( + includes=("italic",), + excludes=("bold", "mono"), + ), + font_size=100, # TODO: De-hardcode font size... Seems to have no effect on top of that + ) + block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor + block.generate_random_text(rectangle, n_sentences) + return block diff --git a/test/fixtures/page_generation/page.py b/test/fixtures/page_generation/page.py index 8c2a244..c106172 100644 --- a/test/fixtures/page_generation/page.py +++ b/test/fixtures/page_generation/page.py @@ -16,13 +16,13 @@ from cv_analysis.utils.image_operations import blur, sharpen, overlay, superimpo from cv_analysis.utils.merging import merge_related_rectangles from cv_analysis.utils.postprocessing import remove_overlapping, remove_included from synthesis.partitioner.two_column import TwoColumnPagePartitioner -from synthesis.random import rnd, probably -from synthesis.segment.content_rectangle import ContentRectangle -from synthesis.segment.segments import generate_random_plot, generate_recursive_random_table +from synthesis.random import rnd +from synthesis.segment.segments import ( + generate_random_plot_with_caption, + generate_recursive_random_table_with_caption, + generate_random_text_block, +) from synthesis.segment.table.table import paste_contents -from synthesis.segment.text_block import TextBlock -from synthesis.text.font import pick_random_font_available_on_system -from synthesis.text.text_block_generator.caption import CaptionGenerator logger.remove() logger.add(sys.stderr, level="INFO") @@ -216,68 +216,7 @@ class ContentGenerator: return boxes -def generate_random_plot_with_caption(rectangle: Rectangle): - plot_box, caption_box = split_into_figure_and_caption(rectangle) - plot_box = generate_random_plot(plot_box) - caption_box = generate_random_image_caption(caption_box) - return plot_box, caption_box - - # TODO: deduplicate with generate_random_table_with_caption -def generate_recursive_random_table_with_caption(rectangle: Rectangle): - table_box, caption_box = split_into_figure_and_caption(rectangle) - table_box = generate_recursive_random_table(table_box, double_rule=probably()) - caption_box = generate_random_table_caption(caption_box) - return table_box, caption_box - - -def split_into_figure_and_caption(rectangle: Rectangle): - gap_percentage = rnd.uniform(0, 0.03) - split_point = rnd.uniform(0.5, 0.9) - figure_box = Rectangle( - rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y1 + rectangle.height * (split_point - gap_percentage / 2) - ) - caption_box = Rectangle( - rectangle.x1, rectangle.y1 + rectangle.height * (split_point + gap_percentage / 2), rectangle.x2, rectangle.y2 - ) - return figure_box, caption_box - - -def generate_random_text_block(rectangle: Rectangle, n_sentences=3000) -> ContentRectangle: - block = TextBlock( - *rectangle.coords, - font=pick_random_font_available_on_system( - includes=("serif", "sans-serif"), - excludes=("bold", "mono", "italic", "oblique", "cursive"), - ), - font_size=30, # TODO: De-hardcode font size... Seems to have no effect on top of that - ) - block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor - block.generate_random_text(rectangle, n_sentences) - return block - - -def generate_random_image_caption(rectangle: Rectangle) -> ContentRectangle: - return generate_random_caption(rectangle, f"Fig {rnd.randint(1, 20)}") - - -def generate_random_table_caption(rectangle: Rectangle) -> ContentRectangle: - return generate_random_caption(rectangle, f"Tabl {rnd.randint(1, 20)}") - - -def generate_random_caption(rectangle: Rectangle, caption_start, n_sentences=1000) -> ContentRectangle: - block = TextBlock( - *rectangle.coords, - text_generator=CaptionGenerator(caption_start=caption_start), - font=pick_random_font_available_on_system( - includes=("italic",), - excludes=("bold", "mono"), - ), - font_size=100, # TODO: De-hardcode font size... Seems to have no effect on top of that - ) - block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor - block.generate_random_text(rectangle, n_sentences) - return block @pytest.fixture(