[WIP] random text segments

This commit is contained in:
Matthias Bisping 2023-01-16 16:34:18 +01:00
parent 1012988475
commit 6fead2d9b9
3 changed files with 84 additions and 7 deletions

17
poetry.lock generated
View File

@ -776,6 +776,17 @@ python-versions = ">=3.7"
[package.extras]
test = ["pytest (>=6)"]
[[package]]
name = "faker"
version = "16.4.0"
description = "Faker is a Python package that generates fake data for you."
category = "dev"
optional = false
python-versions = ">=3.7"
[package.dependencies]
python-dateutil = ">=2.4"
[[package]]
name = "filelock"
version = "3.9.0"
@ -2195,7 +2206,7 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools"
[metadata]
lock-version = "1.1"
python-versions = "~3.8"
content-hash = "87e394dd1a2a230f27fec0d1e634dc4c398fbebe5ae1c3baceee9c93d190945b"
content-hash = "4d220326bd29d9f6b306adc71fe66ec201ca74fc7d277b052195953f8a063037"
[metadata.files]
aiohttp = [
@ -2767,6 +2778,10 @@ exceptiongroup = [
{file = "exceptiongroup-1.1.0-py3-none-any.whl", hash = "sha256:327cbda3da756e2de031a3107b81ab7b3770a602c4d16ca618298c526f4bec1e"},
{file = "exceptiongroup-1.1.0.tar.gz", hash = "sha256:bcb67d800a4497e1b404c2dd44fca47d3b7a5e5433dbab67f96c1a685cdfdf23"},
]
faker = [
{file = "Faker-16.4.0-py3-none-any.whl", hash = "sha256:5420467fad3fa582094057754e5e81326cb1f51ab822bf9df96c077cfb35ae49"},
{file = "Faker-16.4.0.tar.gz", hash = "sha256:dcffdca8ec9a715982bcd5f53ee688dc4784cd112f9910f8f7183773eb3ec276"},
]
filelock = [
{file = "filelock-3.9.0-py3-none-any.whl", hash = "sha256:f58d535af89bb9ad5cd4df046f741f8553a418c01a7856bf0d173bbc9f6bd16d"},
{file = "filelock-3.9.0.tar.gz", hash = "sha256:7b319f24340b51f55a2bf7a12ac0755a9b03e718311dac567a0f4f7fabd2f5de"},

View File

@ -39,6 +39,7 @@ pytest = "^7.0.1"
[tool.poetry.group.test.dependencies]
albumentations = "^1.3.0"
faker = "^16.4.0"
[build-system]
requires = ["poetry-core"]

View File

@ -1,12 +1,15 @@
import random
import string
import textwrap
from typing import Tuple, Union, Iterable, List
import albumentations as A
import cv2 as cv
import numpy as np
import pytest
from PIL import Image, ImageOps
from PIL import Image, ImageOps, ImageFont, ImageDraw
from PIL.Image import Transpose
from faker import Faker
Image_t = Union[Image.Image, np.ndarray]
#
@ -65,7 +68,7 @@ Image_t = Union[Image.Image, np.ndarray]
# ],
# p=0.5,
# )
from funcy import juxt, compose, identity, lflatten
from funcy import juxt, compose, identity, lflatten, lmap
from cv_analysis.locations import TEST_PAGE_TEXTURES_DIR
@ -316,13 +319,70 @@ def blank_page(texture, texture_fn) -> np.ndarray:
"""Creates a blank page with a given orientation and dpi."""
page = random_flip(texture)
page = texture_fn(page)
content_box_generator = ContentBoxGenerator()
boxes = content_box_generator(page)
content_box_generator.draw_boxes(page, boxes)
page_partitioner = PagePartitioner()
boxes = page_partitioner(page)
content_generator = ContentGenerator()
boxes = content_generator(boxes)
page = paste_contents(page, boxes)
page_partitioner.draw_boxes(page, boxes)
page = np.array(page)
return page
class ContentBoxGenerator:
class ContentRectangle(Rectangle):
def __init__(self, x1, y1, x2, y2, content=None):
super().__init__(x1, y1, x2, y2)
self.content = content
class ContentGenerator:
def __init__(self):
pass
def __call__(self, boxes: Iterable[Rectangle]) -> Image:
return lmap(generate_random_text_block, boxes)
def generate_random_text_block(rectangle: Rectangle) -> ContentRectangle:
font = ImageFont.load_default()
image = Image.new("RGBA", (rectangle.width, rectangle.height), (0, 255, 255, 0))
draw = ImageDraw.Draw(image)
fake = Faker()
text = fake.paragraph(nb_sentences=300, variable_nb_sentences=True, ext_word_list=None)
wrapped_text = textwrap.wrap(text, width=image.width, break_long_words=False)
# Set the initial position for the text
x, y = 0, 0
# Iterate through the lines of wrapped text
for line in wrapped_text:
# Draw the line of text at the current position
draw.text((x, y), line, font=font, fill=(0, 0, 0, 200))
# Update the y-coordinate for the next line
y += draw.textsize(line, font=font)[1]
return ContentRectangle(rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y2, content=image)
def paste_content(page, content_box: ContentRectangle):
assert page.mode == "RGB"
assert content_box.content.mode == "RGBA"
page.paste(content_box.content, (content_box.x1, content_box.y1), content_box.content)
return page
def paste_contents(page, contents: Iterable[ContentRectangle]):
for content in contents:
paste_content(page, content)
return page
class PagePartitioner:
def __init__(self):
self.left_margin_percentage = 0.05
self.right_margin_percentage = 0.05
@ -343,6 +403,7 @@ class ContentBoxGenerator:
box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin)
boxes = lflatten(self.generate_content_boxes(box))
boxes = self.drop_small_boxes(boxes, *page.size)
boxes = list(boxes)
return boxes
def draw_boxes(self, page: Image, boxes: Iterable[Rectangle]):