Refactoring: Move

Move text block into its own module
This commit is contained in:
Matthias Bisping 2023-02-01 17:24:54 +01:00
parent cef97b33f9
commit e258df899f
3 changed files with 115 additions and 109 deletions

View File

View File

@ -0,0 +1,112 @@
import abc
import textwrap
from typing import List
from PIL import Image, ImageDraw, ImageFont
from faker import Faker
from funcy import first, identity, iterate, take, last, rest
from cv_analysis.utils import star, conj
from cv_analysis.utils.image_operations import superimpose
from cv_analysis.utils.rectangle import Rectangle
from synthesis.random import rnd
from synthesis.segment.content_rectangle import ContentRectangle
from synthesis.text.font import pick_random_mono_space_font_available_on_system
from synthesis.text.line_formatter.identity import IdentityLineFormatter
from synthesis.text.line_formatter.paragraph import ParagraphLineFormatter
class TextBlock(ContentRectangle):
def __init__(self, x1, y1, x2, y2, text_generator=None, font=None, font_size=None):
super().__init__(x1, y1, x2, y2)
self.font = font or ImageFont.load_default() # pick_random_font_available_on_system(size=font_size)
self.text_generator = text_generator or ParagraphGenerator()
def __call__(self, *args, **kwargs):
pass
def generate_random_text(self, rectangle: Rectangle, n_sentences=3000):
lines = self.text_generator(rectangle, n_sentences)
image = write_lines_to_image(lines, rectangle, self.font)
return self.__put_content(image)
def put_text(self, text: str, rectangle: Rectangle):
text_width, text_height = self.font.getsize(text)
width_delta = text_width - rectangle.width
height_delta = text_height - rectangle.height
image = Image.new("RGBA", (text_width, text_height), (0, 255, 255, 0))
if width_delta > 0 or height_delta > 0:
image = image.resize((int(rectangle.width * 0.9), text_height))
draw = ImageDraw.Draw(image)
draw.text((0, 0), text, font=self.font, fill=(0, 0, 0, 255))
return self.__put_content(image)
def __put_content(self, image: Image.Image):
self.content = image if not self.content else superimpose(self.content, image)
assert self.content.mode == "RGBA"
return self
class TextBlockGenerator(abc.ABC):
pass
class ParagraphGenerator(TextBlockGenerator):
def __init__(self):
self.line_formatter = ParagraphLineFormatter(blank_line_percentage=rnd.uniform(0, 0.5))
def __call__(self, rectangle, n_sentences):
return self.generate_paragraph(rectangle, n_sentences)
def generate_paragraph(self, rectangle, n_sentences):
lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences)
return lines
def write_lines_to_image(lines: List[str], rectangle: Rectangle, font=None) -> Image.Image:
def write_line(line, line_number):
draw.text((0, line_number * text_size), line, font=font, fill=(0, 0, 0, 255))
font = font or pick_random_mono_space_font_available_on_system()
image = Image.new("RGBA", (rectangle.width, rectangle.height), (0, 255, 255, 0))
draw = ImageDraw.Draw(image)
text_size = draw.textsize(first(lines), font=font)[1]
for line_number, line in enumerate(lines):
write_line(line, line_number)
return image
def generate_random_text_lines(rectangle: Rectangle, line_formatter=identity, n_sentences=3000) -> List[str]:
text = Faker().paragraph(nb_sentences=n_sentences, variable_nb_sentences=False, ext_word_list=None)
unformatted_lines = textwrap.wrap(text, width=rectangle.width, break_long_words=False)
# each iteration of the line formatter function formats one more line and adds it to the back of the list
formatted_lines_generator = iterate(star(line_formatter), (unformatted_lines, True))
# hence do as many iterations as there are lines in the rectangle
lines_per_iteration = take(len(unformatted_lines), formatted_lines_generator)
# and then take the lines from the last iteration of the function
formatted_lines, _ = last(lines_per_iteration)
return formatted_lines
class CaptionGenerator(TextBlockGenerator):
def __init__(self, caption_start=None):
self.line_formatter = IdentityLineFormatter()
self.caption_start = caption_start or f"Fig {rnd.randint(1, 20)}"
def __call__(self, rectangle, n_sentences):
return self.generate_paragraph(rectangle, n_sentences)
def generate_paragraph(self, rectangle, n_sentences):
lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences)
first_line_modified = f"{self.caption_start}.: {first(lines)}"
lines = conj(first_line_modified, rest(lines))
return lines

View File

@ -1,9 +1,7 @@
import abc
import io
import itertools
import random
import sys
import textwrap
from copy import deepcopy
from enum import Enum
from functools import lru_cache, partial
@ -13,14 +11,12 @@ from typing import Tuple, Iterable, List
import blend_modes
import numpy as np
import pytest
from PIL import Image, ImageFont, ImageDraw, ImageEnhance
from PIL import Image, ImageDraw, ImageEnhance
from PIL.Image import Transpose
from faker import Faker
from loguru import logger
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap
from cv_analysis.utils import star, conj
from cv_analysis.utils.conversion import normalize_image_format_to_array, normalize_image_format_to_pil
from cv_analysis.utils.image_operations import blur, sharpen, overlay, superimpose, compute_pasting_coordinates
from cv_analysis.utils.merging import merge_related_rectangles
@ -30,9 +26,8 @@ from synthesis.partitioner.two_column import TwoColumnPagePartitioner
from synthesis.random import rnd
from synthesis.segment.content_rectangle import ContentRectangle
from synthesis.segment.random_content_rectangle import RandomContentRectangle
from synthesis.text.font import pick_random_mono_space_font_available_on_system, pick_random_font_available_on_system
from synthesis.text.line_formatter.identity import IdentityLineFormatter
from synthesis.text.line_formatter.paragraph import ParagraphLineFormatter
from synthesis.segment.text_block.text_block import TextBlock, CaptionGenerator
from synthesis.text.font import pick_random_font_available_on_system
from synthesis.text.text import generate_random_words, generate_random_number
logger.remove()
@ -44,11 +39,6 @@ from funcy import (
compose,
identity,
lmap,
first,
iterate,
take,
last,
rest,
lsplit,
lfilter,
repeatedly,
@ -888,89 +878,6 @@ def generate_text_block(rectangle: Rectangle, text) -> ContentRectangle:
return block
def write_lines_to_image(lines: List[str], rectangle: Rectangle, font=None) -> Image.Image:
def write_line(line, line_number):
draw.text((0, line_number * text_size), line, font=font, fill=(0, 0, 0, 255))
font = font or pick_random_mono_space_font_available_on_system()
image = Image.new("RGBA", (rectangle.width, rectangle.height), (0, 255, 255, 0))
draw = ImageDraw.Draw(image)
text_size = draw.textsize(first(lines), font=font)[1]
for line_number, line in enumerate(lines):
write_line(line, line_number)
return image
class TextBlockGenerator(abc.ABC):
pass
class ParagraphGenerator(TextBlockGenerator):
def __init__(self):
self.line_formatter = ParagraphLineFormatter(blank_line_percentage=rnd.uniform(0, 0.5))
def __call__(self, rectangle, n_sentences):
return self.generate_paragraph(rectangle, n_sentences)
def generate_paragraph(self, rectangle, n_sentences):
lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences)
return lines
class CaptionGenerator(TextBlockGenerator):
def __init__(self, caption_start=None):
self.line_formatter = IdentityLineFormatter()
self.caption_start = caption_start or f"Fig {rnd.randint(1, 20)}"
def __call__(self, rectangle, n_sentences):
return self.generate_paragraph(rectangle, n_sentences)
def generate_paragraph(self, rectangle, n_sentences):
lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences)
first_line_modified = f"{self.caption_start}.: {first(lines)}"
lines = conj(first_line_modified, rest(lines))
return lines
class TextBlock(ContentRectangle):
def __init__(self, x1, y1, x2, y2, text_generator=None, font=None, font_size=None):
super().__init__(x1, y1, x2, y2)
self.font = font or ImageFont.load_default() # pick_random_font_available_on_system(size=font_size)
self.text_generator = text_generator or ParagraphGenerator()
def __call__(self, *args, **kwargs):
pass
def generate_random_text(self, rectangle: Rectangle, n_sentences=3000):
lines = self.text_generator(rectangle, n_sentences)
image = write_lines_to_image(lines, rectangle, self.font)
return self.__put_content(image)
def put_text(self, text: str, rectangle: Rectangle):
text_width, text_height = self.font.getsize(text)
width_delta = text_width - rectangle.width
height_delta = text_height - rectangle.height
image = Image.new("RGBA", (text_width, text_height), (0, 255, 255, 0))
if width_delta > 0 or height_delta > 0:
image = image.resize((int(rectangle.width * 0.9), text_height))
draw = ImageDraw.Draw(image)
draw.text((0, 0), text, font=self.font, fill=(0, 0, 0, 255))
return self.__put_content(image)
def __put_content(self, image: Image.Image):
self.content = image if not self.content else superimpose(self.content, image)
assert self.content.mode == "RGBA"
return self
class RandomPageNumber(TextBlock):
def __init__(self, x1, y1, x2, y2):
super().__init__(x1, y1, x2, y2)
@ -982,19 +889,6 @@ class RandomPageNumber(TextBlock):
self.location_coordinates = self.location_to_coordinates(self.pick_location())
def generate_random_text_lines(rectangle: Rectangle, line_formatter=identity, n_sentences=3000) -> List[str]:
text = Faker().paragraph(nb_sentences=n_sentences, variable_nb_sentences=False, ext_word_list=None)
unformatted_lines = textwrap.wrap(text, width=rectangle.width, break_long_words=False)
# each iteration of the line formatter function formats one more line and adds it to the back of the list
formatted_lines_generator = iterate(star(line_formatter), (unformatted_lines, True))
# hence do as many iterations as there are lines in the rectangle
lines_per_iteration = take(len(unformatted_lines), formatted_lines_generator)
# and then take the lines from the last iteration of the function
formatted_lines, _ = last(lines_per_iteration)
return formatted_lines
def paste_content(page, content_box: ContentRectangle):
assert content_box.content.mode == "RGBA"
page.paste(content_box.content, (content_box.x1, content_box.y1), content_box.content)