Refactoring: Move

Move line formatters into their own module
This commit is contained in:
Matthias Bisping 2023-02-01 16:59:31 +01:00
parent cc0094d3f7
commit 9480d58a8a
6 changed files with 58 additions and 50 deletions

View File

View File

@ -0,0 +1,9 @@
from synthesis.text.line_formatter.line_formatter import LineFormatter
class IdentityLineFormatter(LineFormatter):
def __init__(self):
pass
def __call__(self, lines, last_full):
return lines, last_full

View File

@ -0,0 +1,5 @@
import abc
class LineFormatter(abc.ABC):
pass

View File

@ -0,0 +1,41 @@
from funcy import identity, compose, first, juxt, rest, rcompose
from cv_analysis.utils import star, rconj
from synthesis.random import rnd
from synthesis.text.line_formatter.line_formatter import LineFormatter
class ParagraphLineFormatter(LineFormatter):
def __init__(self, blank_line_percentage=None):
self.blank_line_percentage = blank_line_percentage or rnd.uniform(0, 0.5)
def __call__(self, lines, last_full):
return self.format_lines(lines, last_full)
def format_lines(self, lines, last_full):
def truncate_current_line():
return rnd.random() < self.blank_line_percentage and last_full
# This is meant to be read from the bottom up.
current_line_shall_not_be_a_full_line = truncate_current_line()
line_formatter = self.truncate_line if current_line_shall_not_be_a_full_line else identity
format_current_line = compose(line_formatter, first)
move_current_line_to_back = star(rconj)
split_first_line_from_lines_and_format_the_former = juxt(rest, format_current_line)
split_off_current_line_then_format_it_then_move_it_to_the_back = rcompose(
split_first_line_from_lines_and_format_the_former,
move_current_line_to_back,
)
current_line_is_a_full_line = not current_line_shall_not_be_a_full_line
# Start reading here and move up.
return split_off_current_line_then_format_it_then_move_it_to_the_back(lines), current_line_is_a_full_line
def format_line(self, line, full=True):
line = self.truncate_line(line) if not full else line
return line, full
def truncate_line(self, line: str):
n_trailing_words = rnd.randint(0, 4)
line = " ".join(line.split()[-n_trailing_words - 1 : -1]).replace(".", "")
line = line + ".\n" if line else line
return line

View File

@ -21,7 +21,7 @@ from loguru import logger
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap
from cv_analysis.utils import star, rconj, conj
from cv_analysis.utils import star, conj
from cv_analysis.utils.conversion import normalize_image_format_to_array, normalize_image_format_to_pil
from cv_analysis.utils.image_operations import blur, sharpen, overlay, superimpose, compute_pasting_coordinates
from cv_analysis.utils.merging import merge_related_rectangles
@ -31,6 +31,8 @@ from synthesis.partitioner.page_partitioner import PagePartitioner
from synthesis.random import rnd
from synthesis.segment.content_rectangle import ContentRectangle
from synthesis.segment.random_content_rectangle import RandomContentRectangle
from synthesis.text.line_formatter.identity import IdentityLineFormatter
from synthesis.text.line_formatter.paragraph import ParagraphLineFormatter
logger.remove()
logger.add(sys.stderr, level="INFO")
@ -46,7 +48,6 @@ from funcy import (
take,
last,
rest,
rcompose,
lsplit,
lfilter,
lzip,
@ -1012,54 +1013,6 @@ def write_lines_to_image(lines: List[str], rectangle: Rectangle, font=None) -> I
return image
class LineFormatter(abc.ABC):
pass
class IdentityLineFormatter(LineFormatter):
def __init__(self):
pass
def __call__(self, lines, last_full):
return lines, last_full
class ParagraphLineFormatter(LineFormatter):
def __init__(self, blank_line_percentage=None):
self.blank_line_percentage = blank_line_percentage or rnd.uniform(0, 0.5)
def __call__(self, lines, last_full):
return self.format_lines(lines, last_full)
def format_lines(self, lines, last_full):
def truncate_current_line():
return rnd.random() < self.blank_line_percentage and last_full
# This is meant to be read from the bottom up.
current_line_shall_not_be_a_full_line = truncate_current_line()
line_formatter = self.truncate_line if current_line_shall_not_be_a_full_line else identity
format_current_line = compose(line_formatter, first)
move_current_line_to_back = star(rconj)
split_first_line_from_lines_and_format_the_former = juxt(rest, format_current_line)
split_off_current_line_then_format_it_then_move_it_to_the_back = rcompose(
split_first_line_from_lines_and_format_the_former,
move_current_line_to_back,
)
current_line_is_a_full_line = not current_line_shall_not_be_a_full_line
# Start reading here and move up.
return split_off_current_line_then_format_it_then_move_it_to_the_back(lines), current_line_is_a_full_line
def format_line(self, line, full=True):
line = self.truncate_line(line) if not full else line
return line, full
def truncate_line(self, line: str):
n_trailing_words = rnd.randint(0, 4)
line = " ".join(line.split()[-n_trailing_words - 1 : -1]).replace(".", "")
line = line + ".\n" if line else line
return line
class TextBlockGenerator(abc.ABC):
pass