Refactoring: Move
Move line formatters into their own module
This commit is contained in:
parent
cc0094d3f7
commit
9480d58a8a
0
synthesis/text/__init__.py
Normal file
0
synthesis/text/__init__.py
Normal file
0
synthesis/text/line_formatter/__init__.py
Normal file
0
synthesis/text/line_formatter/__init__.py
Normal file
9
synthesis/text/line_formatter/identity.py
Normal file
9
synthesis/text/line_formatter/identity.py
Normal file
@ -0,0 +1,9 @@
|
||||
from synthesis.text.line_formatter.line_formatter import LineFormatter
|
||||
|
||||
|
||||
class IdentityLineFormatter(LineFormatter):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __call__(self, lines, last_full):
|
||||
return lines, last_full
|
||||
5
synthesis/text/line_formatter/line_formatter.py
Normal file
5
synthesis/text/line_formatter/line_formatter.py
Normal file
@ -0,0 +1,5 @@
|
||||
import abc
|
||||
|
||||
|
||||
class LineFormatter(abc.ABC):
|
||||
pass
|
||||
41
synthesis/text/line_formatter/paragraph.py
Normal file
41
synthesis/text/line_formatter/paragraph.py
Normal file
@ -0,0 +1,41 @@
|
||||
from funcy import identity, compose, first, juxt, rest, rcompose
|
||||
|
||||
from cv_analysis.utils import star, rconj
|
||||
from synthesis.random import rnd
|
||||
from synthesis.text.line_formatter.line_formatter import LineFormatter
|
||||
|
||||
|
||||
class ParagraphLineFormatter(LineFormatter):
|
||||
def __init__(self, blank_line_percentage=None):
|
||||
self.blank_line_percentage = blank_line_percentage or rnd.uniform(0, 0.5)
|
||||
|
||||
def __call__(self, lines, last_full):
|
||||
return self.format_lines(lines, last_full)
|
||||
|
||||
def format_lines(self, lines, last_full):
|
||||
def truncate_current_line():
|
||||
return rnd.random() < self.blank_line_percentage and last_full
|
||||
|
||||
# This is meant to be read from the bottom up.
|
||||
current_line_shall_not_be_a_full_line = truncate_current_line()
|
||||
line_formatter = self.truncate_line if current_line_shall_not_be_a_full_line else identity
|
||||
format_current_line = compose(line_formatter, first)
|
||||
move_current_line_to_back = star(rconj)
|
||||
split_first_line_from_lines_and_format_the_former = juxt(rest, format_current_line)
|
||||
split_off_current_line_then_format_it_then_move_it_to_the_back = rcompose(
|
||||
split_first_line_from_lines_and_format_the_former,
|
||||
move_current_line_to_back,
|
||||
)
|
||||
current_line_is_a_full_line = not current_line_shall_not_be_a_full_line
|
||||
# Start reading here and move up.
|
||||
return split_off_current_line_then_format_it_then_move_it_to_the_back(lines), current_line_is_a_full_line
|
||||
|
||||
def format_line(self, line, full=True):
|
||||
line = self.truncate_line(line) if not full else line
|
||||
return line, full
|
||||
|
||||
def truncate_line(self, line: str):
|
||||
n_trailing_words = rnd.randint(0, 4)
|
||||
line = " ".join(line.split()[-n_trailing_words - 1 : -1]).replace(".", "")
|
||||
line = line + ".\n" if line else line
|
||||
return line
|
||||
53
test/fixtures/page_generation/page.py
vendored
53
test/fixtures/page_generation/page.py
vendored
@ -21,7 +21,7 @@ from loguru import logger
|
||||
from matplotlib import pyplot as plt
|
||||
from matplotlib.colors import ListedColormap
|
||||
|
||||
from cv_analysis.utils import star, rconj, conj
|
||||
from cv_analysis.utils import star, conj
|
||||
from cv_analysis.utils.conversion import normalize_image_format_to_array, normalize_image_format_to_pil
|
||||
from cv_analysis.utils.image_operations import blur, sharpen, overlay, superimpose, compute_pasting_coordinates
|
||||
from cv_analysis.utils.merging import merge_related_rectangles
|
||||
@ -31,6 +31,8 @@ from synthesis.partitioner.page_partitioner import PagePartitioner
|
||||
from synthesis.random import rnd
|
||||
from synthesis.segment.content_rectangle import ContentRectangle
|
||||
from synthesis.segment.random_content_rectangle import RandomContentRectangle
|
||||
from synthesis.text.line_formatter.identity import IdentityLineFormatter
|
||||
from synthesis.text.line_formatter.paragraph import ParagraphLineFormatter
|
||||
|
||||
logger.remove()
|
||||
logger.add(sys.stderr, level="INFO")
|
||||
@ -46,7 +48,6 @@ from funcy import (
|
||||
take,
|
||||
last,
|
||||
rest,
|
||||
rcompose,
|
||||
lsplit,
|
||||
lfilter,
|
||||
lzip,
|
||||
@ -1012,54 +1013,6 @@ def write_lines_to_image(lines: List[str], rectangle: Rectangle, font=None) -> I
|
||||
return image
|
||||
|
||||
|
||||
class LineFormatter(abc.ABC):
|
||||
pass
|
||||
|
||||
|
||||
class IdentityLineFormatter(LineFormatter):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __call__(self, lines, last_full):
|
||||
return lines, last_full
|
||||
|
||||
|
||||
class ParagraphLineFormatter(LineFormatter):
|
||||
def __init__(self, blank_line_percentage=None):
|
||||
self.blank_line_percentage = blank_line_percentage or rnd.uniform(0, 0.5)
|
||||
|
||||
def __call__(self, lines, last_full):
|
||||
return self.format_lines(lines, last_full)
|
||||
|
||||
def format_lines(self, lines, last_full):
|
||||
def truncate_current_line():
|
||||
return rnd.random() < self.blank_line_percentage and last_full
|
||||
|
||||
# This is meant to be read from the bottom up.
|
||||
current_line_shall_not_be_a_full_line = truncate_current_line()
|
||||
line_formatter = self.truncate_line if current_line_shall_not_be_a_full_line else identity
|
||||
format_current_line = compose(line_formatter, first)
|
||||
move_current_line_to_back = star(rconj)
|
||||
split_first_line_from_lines_and_format_the_former = juxt(rest, format_current_line)
|
||||
split_off_current_line_then_format_it_then_move_it_to_the_back = rcompose(
|
||||
split_first_line_from_lines_and_format_the_former,
|
||||
move_current_line_to_back,
|
||||
)
|
||||
current_line_is_a_full_line = not current_line_shall_not_be_a_full_line
|
||||
# Start reading here and move up.
|
||||
return split_off_current_line_then_format_it_then_move_it_to_the_back(lines), current_line_is_a_full_line
|
||||
|
||||
def format_line(self, line, full=True):
|
||||
line = self.truncate_line(line) if not full else line
|
||||
return line, full
|
||||
|
||||
def truncate_line(self, line: str):
|
||||
n_trailing_words = rnd.randint(0, 4)
|
||||
line = " ".join(line.split()[-n_trailing_words - 1 : -1]).replace(".", "")
|
||||
line = line + ".\n" if line else line
|
||||
return line
|
||||
|
||||
|
||||
class TextBlockGenerator(abc.ABC):
|
||||
pass
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user