diff --git a/synthesis/segment/text_block/__init__.py b/synthesis/segment/text_block/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/synthesis/segment/text_block/text_block.py b/synthesis/segment/text_block/text_block.py new file mode 100644 index 0000000..b76de5e --- /dev/null +++ b/synthesis/segment/text_block/text_block.py @@ -0,0 +1,112 @@ +import abc +import textwrap +from typing import List + +from PIL import Image, ImageDraw, ImageFont +from faker import Faker +from funcy import first, identity, iterate, take, last, rest + +from cv_analysis.utils import star, conj +from cv_analysis.utils.image_operations import superimpose +from cv_analysis.utils.rectangle import Rectangle +from synthesis.random import rnd +from synthesis.segment.content_rectangle import ContentRectangle +from synthesis.text.font import pick_random_mono_space_font_available_on_system +from synthesis.text.line_formatter.identity import IdentityLineFormatter +from synthesis.text.line_formatter.paragraph import ParagraphLineFormatter + + +class TextBlock(ContentRectangle): + def __init__(self, x1, y1, x2, y2, text_generator=None, font=None, font_size=None): + super().__init__(x1, y1, x2, y2) + self.font = font or ImageFont.load_default() # pick_random_font_available_on_system(size=font_size) + self.text_generator = text_generator or ParagraphGenerator() + + def __call__(self, *args, **kwargs): + pass + + def generate_random_text(self, rectangle: Rectangle, n_sentences=3000): + lines = self.text_generator(rectangle, n_sentences) + image = write_lines_to_image(lines, rectangle, self.font) + return self.__put_content(image) + + def put_text(self, text: str, rectangle: Rectangle): + + text_width, text_height = self.font.getsize(text) + + width_delta = text_width - rectangle.width + height_delta = text_height - rectangle.height + + image = Image.new("RGBA", (text_width, text_height), (0, 255, 255, 0)) + + if width_delta > 0 or height_delta > 0: + image = image.resize((int(rectangle.width * 0.9), text_height)) + + draw = ImageDraw.Draw(image) + draw.text((0, 0), text, font=self.font, fill=(0, 0, 0, 255)) + return self.__put_content(image) + + def __put_content(self, image: Image.Image): + self.content = image if not self.content else superimpose(self.content, image) + assert self.content.mode == "RGBA" + return self + + +class TextBlockGenerator(abc.ABC): + pass + + +class ParagraphGenerator(TextBlockGenerator): + def __init__(self): + self.line_formatter = ParagraphLineFormatter(blank_line_percentage=rnd.uniform(0, 0.5)) + + def __call__(self, rectangle, n_sentences): + return self.generate_paragraph(rectangle, n_sentences) + + def generate_paragraph(self, rectangle, n_sentences): + lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences) + return lines + + +def write_lines_to_image(lines: List[str], rectangle: Rectangle, font=None) -> Image.Image: + def write_line(line, line_number): + draw.text((0, line_number * text_size), line, font=font, fill=(0, 0, 0, 255)) + + font = font or pick_random_mono_space_font_available_on_system() + + image = Image.new("RGBA", (rectangle.width, rectangle.height), (0, 255, 255, 0)) + draw = ImageDraw.Draw(image) + text_size = draw.textsize(first(lines), font=font)[1] + + for line_number, line in enumerate(lines): + write_line(line, line_number) + + return image + + +def generate_random_text_lines(rectangle: Rectangle, line_formatter=identity, n_sentences=3000) -> List[str]: + text = Faker().paragraph(nb_sentences=n_sentences, variable_nb_sentences=False, ext_word_list=None) + unformatted_lines = textwrap.wrap(text, width=rectangle.width, break_long_words=False) + # each iteration of the line formatter function formats one more line and adds it to the back of the list + formatted_lines_generator = iterate(star(line_formatter), (unformatted_lines, True)) + # hence do as many iterations as there are lines in the rectangle + lines_per_iteration = take(len(unformatted_lines), formatted_lines_generator) + # and then take the lines from the last iteration of the function + formatted_lines, _ = last(lines_per_iteration) + + return formatted_lines + + +class CaptionGenerator(TextBlockGenerator): + def __init__(self, caption_start=None): + self.line_formatter = IdentityLineFormatter() + self.caption_start = caption_start or f"Fig {rnd.randint(1, 20)}" + + def __call__(self, rectangle, n_sentences): + return self.generate_paragraph(rectangle, n_sentences) + + def generate_paragraph(self, rectangle, n_sentences): + lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences) + first_line_modified = f"{self.caption_start}.: {first(lines)}" + lines = conj(first_line_modified, rest(lines)) + return lines diff --git a/test/fixtures/page_generation/page.py b/test/fixtures/page_generation/page.py index 6bd7049..5078fdd 100644 --- a/test/fixtures/page_generation/page.py +++ b/test/fixtures/page_generation/page.py @@ -1,9 +1,7 @@ -import abc import io import itertools import random import sys -import textwrap from copy import deepcopy from enum import Enum from functools import lru_cache, partial @@ -13,14 +11,12 @@ from typing import Tuple, Iterable, List import blend_modes import numpy as np import pytest -from PIL import Image, ImageFont, ImageDraw, ImageEnhance +from PIL import Image, ImageDraw, ImageEnhance from PIL.Image import Transpose -from faker import Faker from loguru import logger from matplotlib import pyplot as plt from matplotlib.colors import ListedColormap -from cv_analysis.utils import star, conj from cv_analysis.utils.conversion import normalize_image_format_to_array, normalize_image_format_to_pil from cv_analysis.utils.image_operations import blur, sharpen, overlay, superimpose, compute_pasting_coordinates from cv_analysis.utils.merging import merge_related_rectangles @@ -30,9 +26,8 @@ from synthesis.partitioner.two_column import TwoColumnPagePartitioner from synthesis.random import rnd from synthesis.segment.content_rectangle import ContentRectangle from synthesis.segment.random_content_rectangle import RandomContentRectangle -from synthesis.text.font import pick_random_mono_space_font_available_on_system, pick_random_font_available_on_system -from synthesis.text.line_formatter.identity import IdentityLineFormatter -from synthesis.text.line_formatter.paragraph import ParagraphLineFormatter +from synthesis.segment.text_block.text_block import TextBlock, CaptionGenerator +from synthesis.text.font import pick_random_font_available_on_system from synthesis.text.text import generate_random_words, generate_random_number logger.remove() @@ -44,11 +39,6 @@ from funcy import ( compose, identity, lmap, - first, - iterate, - take, - last, - rest, lsplit, lfilter, repeatedly, @@ -888,89 +878,6 @@ def generate_text_block(rectangle: Rectangle, text) -> ContentRectangle: return block -def write_lines_to_image(lines: List[str], rectangle: Rectangle, font=None) -> Image.Image: - def write_line(line, line_number): - draw.text((0, line_number * text_size), line, font=font, fill=(0, 0, 0, 255)) - - font = font or pick_random_mono_space_font_available_on_system() - - image = Image.new("RGBA", (rectangle.width, rectangle.height), (0, 255, 255, 0)) - draw = ImageDraw.Draw(image) - text_size = draw.textsize(first(lines), font=font)[1] - - for line_number, line in enumerate(lines): - write_line(line, line_number) - - return image - - -class TextBlockGenerator(abc.ABC): - pass - - -class ParagraphGenerator(TextBlockGenerator): - def __init__(self): - self.line_formatter = ParagraphLineFormatter(blank_line_percentage=rnd.uniform(0, 0.5)) - - def __call__(self, rectangle, n_sentences): - return self.generate_paragraph(rectangle, n_sentences) - - def generate_paragraph(self, rectangle, n_sentences): - lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences) - return lines - - -class CaptionGenerator(TextBlockGenerator): - def __init__(self, caption_start=None): - self.line_formatter = IdentityLineFormatter() - self.caption_start = caption_start or f"Fig {rnd.randint(1, 20)}" - - def __call__(self, rectangle, n_sentences): - return self.generate_paragraph(rectangle, n_sentences) - - def generate_paragraph(self, rectangle, n_sentences): - lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences) - first_line_modified = f"{self.caption_start}.: {first(lines)}" - lines = conj(first_line_modified, rest(lines)) - return lines - - -class TextBlock(ContentRectangle): - def __init__(self, x1, y1, x2, y2, text_generator=None, font=None, font_size=None): - super().__init__(x1, y1, x2, y2) - self.font = font or ImageFont.load_default() # pick_random_font_available_on_system(size=font_size) - self.text_generator = text_generator or ParagraphGenerator() - - def __call__(self, *args, **kwargs): - pass - - def generate_random_text(self, rectangle: Rectangle, n_sentences=3000): - lines = self.text_generator(rectangle, n_sentences) - image = write_lines_to_image(lines, rectangle, self.font) - return self.__put_content(image) - - def put_text(self, text: str, rectangle: Rectangle): - - text_width, text_height = self.font.getsize(text) - - width_delta = text_width - rectangle.width - height_delta = text_height - rectangle.height - - image = Image.new("RGBA", (text_width, text_height), (0, 255, 255, 0)) - - if width_delta > 0 or height_delta > 0: - image = image.resize((int(rectangle.width * 0.9), text_height)) - - draw = ImageDraw.Draw(image) - draw.text((0, 0), text, font=self.font, fill=(0, 0, 0, 255)) - return self.__put_content(image) - - def __put_content(self, image: Image.Image): - self.content = image if not self.content else superimpose(self.content, image) - assert self.content.mode == "RGBA" - return self - - class RandomPageNumber(TextBlock): def __init__(self, x1, y1, x2, y2): super().__init__(x1, y1, x2, y2) @@ -982,19 +889,6 @@ class RandomPageNumber(TextBlock): self.location_coordinates = self.location_to_coordinates(self.pick_location()) -def generate_random_text_lines(rectangle: Rectangle, line_formatter=identity, n_sentences=3000) -> List[str]: - text = Faker().paragraph(nb_sentences=n_sentences, variable_nb_sentences=False, ext_word_list=None) - unformatted_lines = textwrap.wrap(text, width=rectangle.width, break_long_words=False) - # each iteration of the line formatter function formats one more line and adds it to the back of the list - formatted_lines_generator = iterate(star(line_formatter), (unformatted_lines, True)) - # hence do as many iterations as there are lines in the rectangle - lines_per_iteration = take(len(unformatted_lines), formatted_lines_generator) - # and then take the lines from the last iteration of the function - formatted_lines, _ = last(lines_per_iteration) - - return formatted_lines - - def paste_content(page, content_box: ContentRectangle): assert content_box.content.mode == "RGBA" page.paste(content_box.content, (content_box.x1, content_box.y1), content_box.content)