diff --git a/poetry.lock b/poetry.lock index 91ccce6..231ee3b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -776,6 +776,17 @@ python-versions = ">=3.7" [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "faker" +version = "16.4.0" +description = "Faker is a Python package that generates fake data for you." +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +python-dateutil = ">=2.4" + [[package]] name = "filelock" version = "3.9.0" @@ -2195,7 +2206,7 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools" [metadata] lock-version = "1.1" python-versions = "~3.8" -content-hash = "87e394dd1a2a230f27fec0d1e634dc4c398fbebe5ae1c3baceee9c93d190945b" +content-hash = "4d220326bd29d9f6b306adc71fe66ec201ca74fc7d277b052195953f8a063037" [metadata.files] aiohttp = [ @@ -2767,6 +2778,10 @@ exceptiongroup = [ {file = "exceptiongroup-1.1.0-py3-none-any.whl", hash = "sha256:327cbda3da756e2de031a3107b81ab7b3770a602c4d16ca618298c526f4bec1e"}, {file = "exceptiongroup-1.1.0.tar.gz", hash = "sha256:bcb67d800a4497e1b404c2dd44fca47d3b7a5e5433dbab67f96c1a685cdfdf23"}, ] +faker = [ + {file = "Faker-16.4.0-py3-none-any.whl", hash = "sha256:5420467fad3fa582094057754e5e81326cb1f51ab822bf9df96c077cfb35ae49"}, + {file = "Faker-16.4.0.tar.gz", hash = "sha256:dcffdca8ec9a715982bcd5f53ee688dc4784cd112f9910f8f7183773eb3ec276"}, +] filelock = [ {file = "filelock-3.9.0-py3-none-any.whl", hash = "sha256:f58d535af89bb9ad5cd4df046f741f8553a418c01a7856bf0d173bbc9f6bd16d"}, {file = "filelock-3.9.0.tar.gz", hash = "sha256:7b319f24340b51f55a2bf7a12ac0755a9b03e718311dac567a0f4f7fabd2f5de"}, diff --git a/pyproject.toml b/pyproject.toml index 081ea5a..82bda59 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ pytest = "^7.0.1" [tool.poetry.group.test.dependencies] albumentations = "^1.3.0" +faker = "^16.4.0" [build-system] requires = ["poetry-core"] diff --git a/test/fixtures/page_generation/page.py b/test/fixtures/page_generation/page.py index 5c18655..b3d0010 100644 --- a/test/fixtures/page_generation/page.py +++ b/test/fixtures/page_generation/page.py @@ -1,12 +1,15 @@ import random +import string +import textwrap from typing import Tuple, Union, Iterable, List import albumentations as A import cv2 as cv import numpy as np import pytest -from PIL import Image, ImageOps +from PIL import Image, ImageOps, ImageFont, ImageDraw from PIL.Image import Transpose +from faker import Faker Image_t = Union[Image.Image, np.ndarray] # @@ -65,7 +68,7 @@ Image_t = Union[Image.Image, np.ndarray] # ], # p=0.5, # ) -from funcy import juxt, compose, identity, lflatten +from funcy import juxt, compose, identity, lflatten, lmap from cv_analysis.locations import TEST_PAGE_TEXTURES_DIR @@ -316,13 +319,70 @@ def blank_page(texture, texture_fn) -> np.ndarray: """Creates a blank page with a given orientation and dpi.""" page = random_flip(texture) page = texture_fn(page) - content_box_generator = ContentBoxGenerator() - boxes = content_box_generator(page) - content_box_generator.draw_boxes(page, boxes) + page_partitioner = PagePartitioner() + boxes = page_partitioner(page) + content_generator = ContentGenerator() + boxes = content_generator(boxes) + page = paste_contents(page, boxes) + page_partitioner.draw_boxes(page, boxes) + + page = np.array(page) return page -class ContentBoxGenerator: +class ContentRectangle(Rectangle): + def __init__(self, x1, y1, x2, y2, content=None): + super().__init__(x1, y1, x2, y2) + self.content = content + + +class ContentGenerator: + def __init__(self): + pass + + def __call__(self, boxes: Iterable[Rectangle]) -> Image: + return lmap(generate_random_text_block, boxes) + + +def generate_random_text_block(rectangle: Rectangle) -> ContentRectangle: + + font = ImageFont.load_default() + image = Image.new("RGBA", (rectangle.width, rectangle.height), (0, 255, 255, 0)) + draw = ImageDraw.Draw(image) + fake = Faker() + text = fake.paragraph(nb_sentences=300, variable_nb_sentences=True, ext_word_list=None) + + wrapped_text = textwrap.wrap(text, width=image.width, break_long_words=False) + + # Set the initial position for the text + x, y = 0, 0 + + # Iterate through the lines of wrapped text + for line in wrapped_text: + # Draw the line of text at the current position + draw.text((x, y), line, font=font, fill=(0, 0, 0, 200)) + # Update the y-coordinate for the next line + y += draw.textsize(line, font=font)[1] + + return ContentRectangle(rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y2, content=image) + + +def paste_content(page, content_box: ContentRectangle): + + assert page.mode == "RGB" + assert content_box.content.mode == "RGBA" + + page.paste(content_box.content, (content_box.x1, content_box.y1), content_box.content) + return page + + +def paste_contents(page, contents: Iterable[ContentRectangle]): + for content in contents: + paste_content(page, content) + return page + + +class PagePartitioner: def __init__(self): self.left_margin_percentage = 0.05 self.right_margin_percentage = 0.05 @@ -343,6 +403,7 @@ class ContentBoxGenerator: box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin) boxes = lflatten(self.generate_content_boxes(box)) boxes = self.drop_small_boxes(boxes, *page.size) + boxes = list(boxes) return boxes def draw_boxes(self, page: Image, boxes: Iterable[Rectangle]):