Refactoring & Add table captioons
This commit is contained in:
parent
9e77e25afb
commit
7d8842b4ac
@ -53,6 +53,14 @@ def until(cond, func, *args, **kwargs):
|
||||
return first(filter(cond, iterate(func, *args, **kwargs)))
|
||||
|
||||
|
||||
def conj(x, xs):
|
||||
return [x, *xs]
|
||||
|
||||
|
||||
def rconj(xs, x):
|
||||
return [*xs, x]
|
||||
|
||||
|
||||
def make_merger_sentinel():
|
||||
def no_new_mergers(records):
|
||||
nonlocal number_of_records_so_far
|
||||
@ -69,7 +77,3 @@ def make_merger_sentinel():
|
||||
number_of_records_so_far = -1
|
||||
|
||||
return no_new_mergers
|
||||
|
||||
|
||||
def rconj(xs, x):
|
||||
return [*xs, x]
|
||||
|
||||
108
test/fixtures/page_generation/page.py
vendored
108
test/fixtures/page_generation/page.py
vendored
@ -23,7 +23,7 @@ from matplotlib import pyplot as plt
|
||||
from tabulate import tabulate
|
||||
|
||||
from cv_analysis.table_parsing import isolate_vertical_and_horizontal_components
|
||||
from cv_analysis.utils import star, rconj
|
||||
from cv_analysis.utils import star, rconj, conj
|
||||
from cv_analysis.utils.common import normalize_to_gray_scale
|
||||
from cv_analysis.utils.merging import merge_related_rectangles
|
||||
from cv_analysis.utils.postprocessing import remove_overlapping, remove_included
|
||||
@ -409,23 +409,27 @@ class ContentGenerator:
|
||||
text_boxes = merge_related_rectangles(text_boxes)
|
||||
|
||||
text_boxes = lmap(generate_random_text_block, every_nth(2, text_boxes))
|
||||
tables_1 = lmap(generate_recursive_random_table, every_nth(2, text_boxes[1:]))
|
||||
tables_1, tables_1_captions = zipmap(generate_recursive_random_table_with_caption, every_nth(2, text_boxes[1:]))
|
||||
|
||||
# TODO: Refactor: Figures should be their own class
|
||||
try:
|
||||
plots, captions = map(list, zip(*map(generate_random_figure, every_nth(2, figure_boxes))))
|
||||
except ValueError:
|
||||
plots, captions = [], []
|
||||
plots, plot_captions = zipmap(generate_random_plot_with_caption, every_nth(2, figure_boxes))
|
||||
|
||||
tables_2 = lmap(generate_recursive_random_table, every_nth(2, figure_boxes[1:]))
|
||||
tables_2, tables_2_captions = zipmap(
|
||||
generate_recursive_random_table_with_caption, every_nth(2, figure_boxes[1:])
|
||||
)
|
||||
|
||||
boxes = text_boxes + plots + captions + tables_1 + tables_2
|
||||
boxes = text_boxes + plots + plot_captions + tables_1 + tables_1_captions + tables_2 + tables_2_captions
|
||||
boxes = remove_included(boxes)
|
||||
boxes = remove_overlapping(boxes)
|
||||
|
||||
return boxes
|
||||
|
||||
|
||||
def zipmap(fn, boxes, n=2):
|
||||
rets = lmap(list, zip(*map(fn, boxes)))
|
||||
yield from repeatedly(lambda: [], n) if len(rets) < n else rets
|
||||
|
||||
|
||||
def is_square_like(box: Rectangle):
|
||||
return box.width / box.height > 0.5 and box.height / box.width > 0.5
|
||||
|
||||
@ -434,18 +438,29 @@ def every_nth(n, iterable):
|
||||
return itertools.islice(iterable, 0, None, n)
|
||||
|
||||
|
||||
def generate_random_figure(rectangle: Rectangle):
|
||||
# assert rectangle.height / rectangle.width < 0.7, "Figure is too wide to add a caption."
|
||||
figure_box, caption_box = split_into_figure_and_caption(rectangle)
|
||||
figure_box = generate_random_plot(figure_box)
|
||||
caption_box = generate_random_text_block(caption_box)
|
||||
return figure_box, caption_box
|
||||
def generate_random_plot_with_caption(rectangle: Rectangle):
|
||||
plot_box, caption_box = split_into_figure_and_caption(rectangle)
|
||||
plot_box = generate_random_plot(plot_box)
|
||||
caption_box = generate_random_image_caption(caption_box)
|
||||
return plot_box, caption_box
|
||||
|
||||
|
||||
def generate_recursive_random_table_with_caption(rectangle: Rectangle):
|
||||
table_box, caption_box = split_into_figure_and_caption(rectangle)
|
||||
table_box = generate_recursive_random_table(table_box)
|
||||
caption_box = generate_random_table_caption(caption_box)
|
||||
return table_box, caption_box
|
||||
|
||||
|
||||
def split_into_figure_and_caption(rectangle: Rectangle):
|
||||
gap_percentage = random.uniform(0, 0.03)
|
||||
split_point = random.uniform(0.5, 0.9)
|
||||
figure_box = Rectangle(rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y1 + rectangle.height * split_point)
|
||||
caption_box = Rectangle(rectangle.x1, rectangle.y1 + rectangle.height * split_point, rectangle.x2, rectangle.y2)
|
||||
figure_box = Rectangle(
|
||||
rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y1 + rectangle.height * (split_point - gap_percentage / 2)
|
||||
)
|
||||
caption_box = Rectangle(
|
||||
rectangle.x1, rectangle.y1 + rectangle.height * (split_point + gap_percentage / 2), rectangle.x2, rectangle.y2
|
||||
)
|
||||
return figure_box, caption_box
|
||||
|
||||
|
||||
@ -717,12 +732,13 @@ def dump_plt_to_image(rectangle):
|
||||
|
||||
|
||||
class RandomFontPicker:
|
||||
def __init__(self, font_dir=None):
|
||||
def __init__(self, font_dir=None, size=None):
|
||||
self.fonts = get_fonts(font_dir)
|
||||
self.fonts_lower = [font.lower() for font in self.fonts]
|
||||
|
||||
self.test_image = Image.new("RGB", (200, 200), (255, 255, 255))
|
||||
self.draw = ImageDraw.Draw(self.test_image)
|
||||
self.size = size or 11
|
||||
|
||||
def pick_random_font_available_on_system(self, includes=None, excludes=None) -> ImageFont: # FIXME: Slow!
|
||||
|
||||
@ -772,19 +788,19 @@ def get_fonts(path: Path = None) -> List[str]:
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_font_picker(font_dir=None):
|
||||
return RandomFontPicker(font_dir=font_dir)
|
||||
def get_font_picker(**kwargs):
|
||||
return RandomFontPicker(**kwargs)
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def pick_random_mono_space_font_available_on_system():
|
||||
font_picker = get_font_picker()
|
||||
def pick_random_mono_space_font_available_on_system(**kwargs):
|
||||
font_picker = get_font_picker(**kwargs)
|
||||
return font_picker.pick_random_mono_space_font_available_on_system()
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def pick_random_font_available_on_system():
|
||||
font_picker = get_font_picker()
|
||||
def pick_random_font_available_on_system(**kwargs):
|
||||
font_picker = get_font_picker(**kwargs)
|
||||
return font_picker.pick_random_font_available_on_system(includes="mono")
|
||||
|
||||
|
||||
@ -900,6 +916,21 @@ def generate_random_text_block(rectangle: Rectangle, n_sentences=3000) -> Conten
|
||||
return block
|
||||
|
||||
|
||||
def generate_random_image_caption(rectangle: Rectangle) -> ContentRectangle:
|
||||
return generate_random_caption(rectangle, f"Fig {random.randint(1, 20)}")
|
||||
|
||||
|
||||
def generate_random_table_caption(rectangle: Rectangle) -> ContentRectangle:
|
||||
return generate_random_caption(rectangle, f"Tabl {random.randint(1, 20)}")
|
||||
|
||||
|
||||
def generate_random_caption(rectangle: Rectangle, caption_start, n_sentences=1000) -> ContentRectangle:
|
||||
block = TextBlock(*rectangle.coords, text_generator=CaptionGenerator(caption_start=caption_start), font_size=5)
|
||||
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
||||
block.generate_random_text(rectangle, n_sentences)
|
||||
return block
|
||||
|
||||
|
||||
def generate_text_block(rectangle: Rectangle, text) -> ContentRectangle:
|
||||
block = TextBlock(*rectangle.coords)
|
||||
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
||||
@ -927,6 +958,14 @@ class LineFormatter(abc.ABC):
|
||||
pass
|
||||
|
||||
|
||||
class IdentityLineFormatter(LineFormatter):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __call__(self, lines, last_full):
|
||||
return lines, last_full
|
||||
|
||||
|
||||
class ParagraphLineFormatter(LineFormatter):
|
||||
def __init__(self, blank_line_percentage=None):
|
||||
self.blank_line_percentage = blank_line_percentage or random.uniform(0, 0.5)
|
||||
@ -979,17 +1018,32 @@ class ParagraphGenerator(TextBlockGenerator):
|
||||
return lines
|
||||
|
||||
|
||||
class CaptionGenerator(TextBlockGenerator):
|
||||
def __init__(self, caption_start=None):
|
||||
self.line_formatter = IdentityLineFormatter()
|
||||
self.caption_start = caption_start or f"Fig {random.randint(1, 20)}"
|
||||
|
||||
def __call__(self, rectangle, n_sentences):
|
||||
return self.generate_paragraph(rectangle, n_sentences)
|
||||
|
||||
def generate_paragraph(self, rectangle, n_sentences):
|
||||
lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences)
|
||||
first_line_modified = f"{self.caption_start}.: {first(lines)}"
|
||||
lines = conj(first_line_modified, rest(lines))
|
||||
return lines
|
||||
|
||||
|
||||
class TextBlock(ContentRectangle):
|
||||
def __init__(self, x1, y1, x2, y2, line_generator=None, font=None):
|
||||
def __init__(self, x1, y1, x2, y2, text_generator=None, font=None, font_size=None):
|
||||
super().__init__(x1, y1, x2, y2)
|
||||
self.font = ImageFont.load_default() if not font else pick_random_font_available_on_system()
|
||||
self.line_generator = line_generator or ParagraphGenerator()
|
||||
self.font = pick_random_font_available_on_system(size=font_size) # ImageFont.load_default()
|
||||
self.text_generator = text_generator or ParagraphGenerator()
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def generate_random_text(self, rectangle: Rectangle, n_sentences=3000):
|
||||
lines = self.line_generator(rectangle, n_sentences)
|
||||
lines = self.text_generator(rectangle, n_sentences)
|
||||
image = write_lines_to_image(lines, rectangle, self.font)
|
||||
return self.__put_content(image)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user