From e831ab1382bc47ff9e2aff5487916b131289f628 Mon Sep 17 00:00:00 2001 From: Matthias Bisping Date: Mon, 16 Jan 2023 17:17:50 +0100 Subject: [PATCH] [WIP] random text segments --- cv_analysis/utils/rectangle.py | 4 ++ test/fixtures/page_generation/page.py | 73 +++++++++++++++++++-------- 2 files changed, 56 insertions(+), 21 deletions(-) diff --git a/cv_analysis/utils/rectangle.py b/cv_analysis/utils/rectangle.py index b2d8834..5e7ce4e 100644 --- a/cv_analysis/utils/rectangle.py +++ b/cv_analysis/utils/rectangle.py @@ -47,6 +47,10 @@ class Rectangle: def height(self): return abs(self.y2 - self.y1) + @property + def coords(self): + return [self.x1, self.y1, self.x2, self.y2] + def __hash__(self): return hash((self.x1, self.y1, self.x2, self.y2)) diff --git a/test/fixtures/page_generation/page.py b/test/fixtures/page_generation/page.py index b3d0010..0f9431b 100644 --- a/test/fixtures/page_generation/page.py +++ b/test/fixtures/page_generation/page.py @@ -11,6 +11,8 @@ from PIL import Image, ImageOps, ImageFont, ImageDraw from PIL.Image import Transpose from faker import Faker +from cv_analysis.utils.conversion import rectangle_to_box + Image_t = Union[Image.Image, np.ndarray] # # transform = A.Compose( @@ -68,7 +70,7 @@ Image_t = Union[Image.Image, np.ndarray] # ], # p=0.5, # ) -from funcy import juxt, compose, identity, lflatten, lmap +from funcy import juxt, compose, identity, lflatten, lmap, first from cv_analysis.locations import TEST_PAGE_TEXTURES_DIR @@ -344,27 +346,56 @@ class ContentGenerator: return lmap(generate_random_text_block, boxes) +class RandomTextBlock(ContentRectangle): + def __init__(self, x1, y1, x2, y2): + super().__init__(x1, y1, x2, y2) + self.blank_line_percentage = random.uniform(0, 0.5) + self.font = ImageFont.load_default() + + def generate_random_text(self, rectangle: Rectangle): + + image = Image.new("RGBA", (rectangle.width, rectangle.height), (0, 255, 255, 0)) + draw = ImageDraw.Draw(image) + text = Faker().paragraph(nb_sentences=1000, variable_nb_sentences=False, ext_word_list=None) + + wrapped_text = textwrap.wrap(text, width=image.width, break_long_words=False) + text_size = draw.textsize(first(wrapped_text), font=self.font)[1] + + last_full = True + for i, line in enumerate(wrapped_text): + if random.random() < self.blank_line_percentage and last_full: + line = self.truncate_line(line) + last_full = False + else: + last_full = True + + draw.text((0, i * text_size), line, font=self.font, fill=(0, 0, 0, 200)) + + self.content = image + + def f(self, last_full, line): + if random.random() < self.blank_line_percentage and last_full: + line = self.truncate_line(line) + last_full = False + else: + last_full = True + return last_full, line + + def format_line(self, line, full=True): + line = self.truncate_line(line) if not full else line + return line, full + + def truncate_line(self, line: str): + n_trailing_words = random.randint(0, 4) + line = " ".join(line.split()[-n_trailing_words - 1 : -1]).replace(".", "") + line = line + ".\n" if line else line + return line + + def generate_random_text_block(rectangle: Rectangle) -> ContentRectangle: - - font = ImageFont.load_default() - image = Image.new("RGBA", (rectangle.width, rectangle.height), (0, 255, 255, 0)) - draw = ImageDraw.Draw(image) - fake = Faker() - text = fake.paragraph(nb_sentences=300, variable_nb_sentences=True, ext_word_list=None) - - wrapped_text = textwrap.wrap(text, width=image.width, break_long_words=False) - - # Set the initial position for the text - x, y = 0, 0 - - # Iterate through the lines of wrapped text - for line in wrapped_text: - # Draw the line of text at the current position - draw.text((x, y), line, font=font, fill=(0, 0, 0, 200)) - # Update the y-coordinate for the next line - y += draw.textsize(line, font=font)[1] - - return ContentRectangle(rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y2, content=image) + block = RandomTextBlock(*rectangle.coords) + block.generate_random_text(rectangle) + return block def paste_content(page, content_box: ContentRectangle):