diff --git a/synthesis/segment/text_block/text_block.py b/synthesis/segment/text_block/text_block.py index b76de5e..cdd91fd 100644 --- a/synthesis/segment/text_block/text_block.py +++ b/synthesis/segment/text_block/text_block.py @@ -1,19 +1,13 @@ -import abc -import textwrap from typing import List from PIL import Image, ImageDraw, ImageFont -from faker import Faker -from funcy import first, identity, iterate, take, last, rest +from funcy import first -from cv_analysis.utils import star, conj from cv_analysis.utils.image_operations import superimpose from cv_analysis.utils.rectangle import Rectangle -from synthesis.random import rnd from synthesis.segment.content_rectangle import ContentRectangle +from synthesis.segment.text_block_generator.paragraph import ParagraphGenerator from synthesis.text.font import pick_random_mono_space_font_available_on_system -from synthesis.text.line_formatter.identity import IdentityLineFormatter -from synthesis.text.line_formatter.paragraph import ParagraphLineFormatter class TextBlock(ContentRectangle): @@ -52,22 +46,6 @@ class TextBlock(ContentRectangle): return self -class TextBlockGenerator(abc.ABC): - pass - - -class ParagraphGenerator(TextBlockGenerator): - def __init__(self): - self.line_formatter = ParagraphLineFormatter(blank_line_percentage=rnd.uniform(0, 0.5)) - - def __call__(self, rectangle, n_sentences): - return self.generate_paragraph(rectangle, n_sentences) - - def generate_paragraph(self, rectangle, n_sentences): - lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences) - return lines - - def write_lines_to_image(lines: List[str], rectangle: Rectangle, font=None) -> Image.Image: def write_line(line, line_number): draw.text((0, line_number * text_size), line, font=font, fill=(0, 0, 0, 255)) @@ -82,31 +60,3 @@ def write_lines_to_image(lines: List[str], rectangle: Rectangle, font=None) -> I write_line(line, line_number) return image - - -def generate_random_text_lines(rectangle: Rectangle, line_formatter=identity, n_sentences=3000) -> List[str]: - text = Faker().paragraph(nb_sentences=n_sentences, variable_nb_sentences=False, ext_word_list=None) - unformatted_lines = textwrap.wrap(text, width=rectangle.width, break_long_words=False) - # each iteration of the line formatter function formats one more line and adds it to the back of the list - formatted_lines_generator = iterate(star(line_formatter), (unformatted_lines, True)) - # hence do as many iterations as there are lines in the rectangle - lines_per_iteration = take(len(unformatted_lines), formatted_lines_generator) - # and then take the lines from the last iteration of the function - formatted_lines, _ = last(lines_per_iteration) - - return formatted_lines - - -class CaptionGenerator(TextBlockGenerator): - def __init__(self, caption_start=None): - self.line_formatter = IdentityLineFormatter() - self.caption_start = caption_start or f"Fig {rnd.randint(1, 20)}" - - def __call__(self, rectangle, n_sentences): - return self.generate_paragraph(rectangle, n_sentences) - - def generate_paragraph(self, rectangle, n_sentences): - lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences) - first_line_modified = f"{self.caption_start}.: {first(lines)}" - lines = conj(first_line_modified, rest(lines)) - return lines diff --git a/synthesis/segment/text_block_generator/__init__.py b/synthesis/segment/text_block_generator/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/synthesis/segment/text_block_generator/caption.py b/synthesis/segment/text_block_generator/caption.py new file mode 100644 index 0000000..74863af --- /dev/null +++ b/synthesis/segment/text_block_generator/caption.py @@ -0,0 +1,22 @@ +from funcy import first, rest + +from cv_analysis.utils import conj +from synthesis.random import rnd +from synthesis.segment.text_block_generator.paragraph import generate_random_text_lines +from synthesis.segment.text_block_generator.text_block_generator import TextBlockGenerator +from synthesis.text.line_formatter.identity import IdentityLineFormatter + + +class CaptionGenerator(TextBlockGenerator): + def __init__(self, caption_start=None): + self.line_formatter = IdentityLineFormatter() + self.caption_start = caption_start or f"Fig {rnd.randint(1, 20)}" + + def __call__(self, rectangle, n_sentences): + return self.generate_paragraph(rectangle, n_sentences) + + def generate_paragraph(self, rectangle, n_sentences): + lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences) + first_line_modified = f"{self.caption_start}.: {first(lines)}" + lines = conj(first_line_modified, rest(lines)) + return lines diff --git a/synthesis/segment/text_block_generator/paragraph.py b/synthesis/segment/text_block_generator/paragraph.py new file mode 100644 index 0000000..c54c13f --- /dev/null +++ b/synthesis/segment/text_block_generator/paragraph.py @@ -0,0 +1,36 @@ +import textwrap +from typing import List + +from faker import Faker +from funcy import identity, iterate, take, last + +from cv_analysis.utils import star +from cv_analysis.utils.rectangle import Rectangle +from synthesis.random import rnd +from synthesis.segment.text_block_generator.text_block_generator import TextBlockGenerator +from synthesis.text.line_formatter.paragraph import ParagraphLineFormatter + + +class ParagraphGenerator(TextBlockGenerator): + def __init__(self): + self.line_formatter = ParagraphLineFormatter(blank_line_percentage=rnd.uniform(0, 0.5)) + + def __call__(self, rectangle, n_sentences): + return self.generate_paragraph(rectangle, n_sentences) + + def generate_paragraph(self, rectangle, n_sentences): + lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences) + return lines + + +def generate_random_text_lines(rectangle: Rectangle, line_formatter=identity, n_sentences=3000) -> List[str]: + text = Faker().paragraph(nb_sentences=n_sentences, variable_nb_sentences=False, ext_word_list=None) + unformatted_lines = textwrap.wrap(text, width=rectangle.width, break_long_words=False) + # each iteration of the line formatter function formats one more line and adds it to the back of the list + formatted_lines_generator = iterate(star(line_formatter), (unformatted_lines, True)) + # hence do as many iterations as there are lines in the rectangle + lines_per_iteration = take(len(unformatted_lines), formatted_lines_generator) + # and then take the lines from the last iteration of the function + formatted_lines, _ = last(lines_per_iteration) + + return formatted_lines diff --git a/synthesis/segment/text_block_generator/text_block_generator.py b/synthesis/segment/text_block_generator/text_block_generator.py new file mode 100644 index 0000000..32a87b2 --- /dev/null +++ b/synthesis/segment/text_block_generator/text_block_generator.py @@ -0,0 +1,5 @@ +import abc + + +class TextBlockGenerator(abc.ABC): + pass diff --git a/test/fixtures/page_generation/page.py b/test/fixtures/page_generation/page.py index 5078fdd..5b5fce2 100644 --- a/test/fixtures/page_generation/page.py +++ b/test/fixtures/page_generation/page.py @@ -26,7 +26,8 @@ from synthesis.partitioner.two_column import TwoColumnPagePartitioner from synthesis.random import rnd from synthesis.segment.content_rectangle import ContentRectangle from synthesis.segment.random_content_rectangle import RandomContentRectangle -from synthesis.segment.text_block.text_block import TextBlock, CaptionGenerator +from synthesis.segment.text_block.text_block import TextBlock +from synthesis.segment.text_block_generator.caption import CaptionGenerator from synthesis.text.font import pick_random_font_available_on_system from synthesis.text.text import generate_random_words, generate_random_number