[WIP] random text segments

This commit is contained in:
Matthias Bisping 2023-01-16 17:17:50 +01:00
parent 6fead2d9b9
commit e831ab1382
2 changed files with 56 additions and 21 deletions

View File

@ -47,6 +47,10 @@ class Rectangle:
def height(self):
return abs(self.y2 - self.y1)
@property
def coords(self):
return [self.x1, self.y1, self.x2, self.y2]
def __hash__(self):
return hash((self.x1, self.y1, self.x2, self.y2))

View File

@ -11,6 +11,8 @@ from PIL import Image, ImageOps, ImageFont, ImageDraw
from PIL.Image import Transpose
from faker import Faker
from cv_analysis.utils.conversion import rectangle_to_box
Image_t = Union[Image.Image, np.ndarray]
#
# transform = A.Compose(
@ -68,7 +70,7 @@ Image_t = Union[Image.Image, np.ndarray]
# ],
# p=0.5,
# )
from funcy import juxt, compose, identity, lflatten, lmap
from funcy import juxt, compose, identity, lflatten, lmap, first
from cv_analysis.locations import TEST_PAGE_TEXTURES_DIR
@ -344,27 +346,56 @@ class ContentGenerator:
return lmap(generate_random_text_block, boxes)
class RandomTextBlock(ContentRectangle):
def __init__(self, x1, y1, x2, y2):
super().__init__(x1, y1, x2, y2)
self.blank_line_percentage = random.uniform(0, 0.5)
self.font = ImageFont.load_default()
def generate_random_text(self, rectangle: Rectangle):
image = Image.new("RGBA", (rectangle.width, rectangle.height), (0, 255, 255, 0))
draw = ImageDraw.Draw(image)
text = Faker().paragraph(nb_sentences=1000, variable_nb_sentences=False, ext_word_list=None)
wrapped_text = textwrap.wrap(text, width=image.width, break_long_words=False)
text_size = draw.textsize(first(wrapped_text), font=self.font)[1]
last_full = True
for i, line in enumerate(wrapped_text):
if random.random() < self.blank_line_percentage and last_full:
line = self.truncate_line(line)
last_full = False
else:
last_full = True
draw.text((0, i * text_size), line, font=self.font, fill=(0, 0, 0, 200))
self.content = image
def f(self, last_full, line):
if random.random() < self.blank_line_percentage and last_full:
line = self.truncate_line(line)
last_full = False
else:
last_full = True
return last_full, line
def format_line(self, line, full=True):
line = self.truncate_line(line) if not full else line
return line, full
def truncate_line(self, line: str):
n_trailing_words = random.randint(0, 4)
line = " ".join(line.split()[-n_trailing_words - 1 : -1]).replace(".", "")
line = line + ".\n" if line else line
return line
def generate_random_text_block(rectangle: Rectangle) -> ContentRectangle:
font = ImageFont.load_default()
image = Image.new("RGBA", (rectangle.width, rectangle.height), (0, 255, 255, 0))
draw = ImageDraw.Draw(image)
fake = Faker()
text = fake.paragraph(nb_sentences=300, variable_nb_sentences=True, ext_word_list=None)
wrapped_text = textwrap.wrap(text, width=image.width, break_long_words=False)
# Set the initial position for the text
x, y = 0, 0
# Iterate through the lines of wrapped text
for line in wrapped_text:
# Draw the line of text at the current position
draw.text((x, y), line, font=font, fill=(0, 0, 0, 200))
# Update the y-coordinate for the next line
y += draw.textsize(line, font=font)[1]
return ContentRectangle(rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y2, content=image)
block = RandomTextBlock(*rectangle.coords)
block.generate_random_text(rectangle)
return block
def paste_content(page, content_box: ContentRectangle):