Refactoring: Move
Move remaining segment generation functions into segments module
This commit is contained in:
parent
d2cb78d38f
commit
ddc92461d7
@ -1,8 +1,10 @@
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
from synthesis.random import probably, rnd
|
||||
from synthesis.segment.content_rectangle import ContentRectangle
|
||||
from synthesis.segment.plot import RandomPlot
|
||||
from synthesis.segment.text_block import TextBlock
|
||||
from synthesis.text.font import pick_random_font_available_on_system
|
||||
from synthesis.text.text_block_generator.caption import CaptionGenerator
|
||||
|
||||
|
||||
def generate_random_plot(rectangle: Rectangle) -> ContentRectangle:
|
||||
@ -34,3 +36,66 @@ def generate_text_block(rectangle: Rectangle, text) -> ContentRectangle:
|
||||
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
||||
block.put_text(text, rectangle)
|
||||
return block
|
||||
|
||||
|
||||
def generate_random_plot_with_caption(rectangle: Rectangle):
|
||||
plot_box, caption_box = split_into_figure_and_caption(rectangle)
|
||||
plot_box = generate_random_plot(plot_box)
|
||||
caption_box = generate_random_image_caption(caption_box)
|
||||
return plot_box, caption_box
|
||||
|
||||
|
||||
def generate_recursive_random_table_with_caption(rectangle: Rectangle):
|
||||
table_box, caption_box = split_into_figure_and_caption(rectangle)
|
||||
table_box = generate_recursive_random_table(table_box, double_rule=probably())
|
||||
caption_box = generate_random_table_caption(caption_box)
|
||||
return table_box, caption_box
|
||||
|
||||
|
||||
def split_into_figure_and_caption(rectangle: Rectangle):
|
||||
gap_percentage = rnd.uniform(0, 0.03)
|
||||
split_point = rnd.uniform(0.5, 0.9)
|
||||
figure_box = Rectangle(
|
||||
rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y1 + rectangle.height * (split_point - gap_percentage / 2)
|
||||
)
|
||||
caption_box = Rectangle(
|
||||
rectangle.x1, rectangle.y1 + rectangle.height * (split_point + gap_percentage / 2), rectangle.x2, rectangle.y2
|
||||
)
|
||||
return figure_box, caption_box
|
||||
|
||||
|
||||
def generate_random_text_block(rectangle: Rectangle, n_sentences=3000) -> ContentRectangle:
|
||||
block = TextBlock(
|
||||
*rectangle.coords,
|
||||
font=pick_random_font_available_on_system(
|
||||
includes=("serif", "sans-serif"),
|
||||
excludes=("bold", "mono", "italic", "oblique", "cursive"),
|
||||
),
|
||||
font_size=30, # TODO: De-hardcode font size... Seems to have no effect on top of that
|
||||
)
|
||||
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
||||
block.generate_random_text(rectangle, n_sentences)
|
||||
return block
|
||||
|
||||
|
||||
def generate_random_image_caption(rectangle: Rectangle) -> ContentRectangle:
|
||||
return generate_random_caption(rectangle, f"Fig {rnd.randint(1, 20)}")
|
||||
|
||||
|
||||
def generate_random_table_caption(rectangle: Rectangle) -> ContentRectangle:
|
||||
return generate_random_caption(rectangle, f"Tabl {rnd.randint(1, 20)}")
|
||||
|
||||
|
||||
def generate_random_caption(rectangle: Rectangle, caption_start, n_sentences=1000) -> ContentRectangle:
|
||||
block = TextBlock(
|
||||
*rectangle.coords,
|
||||
text_generator=CaptionGenerator(caption_start=caption_start),
|
||||
font=pick_random_font_available_on_system(
|
||||
includes=("italic",),
|
||||
excludes=("bold", "mono"),
|
||||
),
|
||||
font_size=100, # TODO: De-hardcode font size... Seems to have no effect on top of that
|
||||
)
|
||||
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
||||
block.generate_random_text(rectangle, n_sentences)
|
||||
return block
|
||||
|
||||
73
test/fixtures/page_generation/page.py
vendored
73
test/fixtures/page_generation/page.py
vendored
@ -16,13 +16,13 @@ from cv_analysis.utils.image_operations import blur, sharpen, overlay, superimpo
|
||||
from cv_analysis.utils.merging import merge_related_rectangles
|
||||
from cv_analysis.utils.postprocessing import remove_overlapping, remove_included
|
||||
from synthesis.partitioner.two_column import TwoColumnPagePartitioner
|
||||
from synthesis.random import rnd, probably
|
||||
from synthesis.segment.content_rectangle import ContentRectangle
|
||||
from synthesis.segment.segments import generate_random_plot, generate_recursive_random_table
|
||||
from synthesis.random import rnd
|
||||
from synthesis.segment.segments import (
|
||||
generate_random_plot_with_caption,
|
||||
generate_recursive_random_table_with_caption,
|
||||
generate_random_text_block,
|
||||
)
|
||||
from synthesis.segment.table.table import paste_contents
|
||||
from synthesis.segment.text_block import TextBlock
|
||||
from synthesis.text.font import pick_random_font_available_on_system
|
||||
from synthesis.text.text_block_generator.caption import CaptionGenerator
|
||||
|
||||
logger.remove()
|
||||
logger.add(sys.stderr, level="INFO")
|
||||
@ -216,68 +216,7 @@ class ContentGenerator:
|
||||
return boxes
|
||||
|
||||
|
||||
def generate_random_plot_with_caption(rectangle: Rectangle):
|
||||
plot_box, caption_box = split_into_figure_and_caption(rectangle)
|
||||
plot_box = generate_random_plot(plot_box)
|
||||
caption_box = generate_random_image_caption(caption_box)
|
||||
return plot_box, caption_box
|
||||
|
||||
|
||||
# TODO: deduplicate with generate_random_table_with_caption
|
||||
def generate_recursive_random_table_with_caption(rectangle: Rectangle):
|
||||
table_box, caption_box = split_into_figure_and_caption(rectangle)
|
||||
table_box = generate_recursive_random_table(table_box, double_rule=probably())
|
||||
caption_box = generate_random_table_caption(caption_box)
|
||||
return table_box, caption_box
|
||||
|
||||
|
||||
def split_into_figure_and_caption(rectangle: Rectangle):
|
||||
gap_percentage = rnd.uniform(0, 0.03)
|
||||
split_point = rnd.uniform(0.5, 0.9)
|
||||
figure_box = Rectangle(
|
||||
rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y1 + rectangle.height * (split_point - gap_percentage / 2)
|
||||
)
|
||||
caption_box = Rectangle(
|
||||
rectangle.x1, rectangle.y1 + rectangle.height * (split_point + gap_percentage / 2), rectangle.x2, rectangle.y2
|
||||
)
|
||||
return figure_box, caption_box
|
||||
|
||||
|
||||
def generate_random_text_block(rectangle: Rectangle, n_sentences=3000) -> ContentRectangle:
|
||||
block = TextBlock(
|
||||
*rectangle.coords,
|
||||
font=pick_random_font_available_on_system(
|
||||
includes=("serif", "sans-serif"),
|
||||
excludes=("bold", "mono", "italic", "oblique", "cursive"),
|
||||
),
|
||||
font_size=30, # TODO: De-hardcode font size... Seems to have no effect on top of that
|
||||
)
|
||||
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
||||
block.generate_random_text(rectangle, n_sentences)
|
||||
return block
|
||||
|
||||
|
||||
def generate_random_image_caption(rectangle: Rectangle) -> ContentRectangle:
|
||||
return generate_random_caption(rectangle, f"Fig {rnd.randint(1, 20)}")
|
||||
|
||||
|
||||
def generate_random_table_caption(rectangle: Rectangle) -> ContentRectangle:
|
||||
return generate_random_caption(rectangle, f"Tabl {rnd.randint(1, 20)}")
|
||||
|
||||
|
||||
def generate_random_caption(rectangle: Rectangle, caption_start, n_sentences=1000) -> ContentRectangle:
|
||||
block = TextBlock(
|
||||
*rectangle.coords,
|
||||
text_generator=CaptionGenerator(caption_start=caption_start),
|
||||
font=pick_random_font_available_on_system(
|
||||
includes=("italic",),
|
||||
excludes=("bold", "mono"),
|
||||
),
|
||||
font_size=100, # TODO: De-hardcode font size... Seems to have no effect on top of that
|
||||
)
|
||||
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
||||
block.generate_random_text(rectangle, n_sentences)
|
||||
return block
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user