From 42d285e35b82ba0f36835eff6ff70c50bd80d20c Mon Sep 17 00:00:00 2001 From: Matthias Bisping Date: Wed, 1 Feb 2023 18:33:44 +0100 Subject: [PATCH] Refactoring: Move Move content generator into its own module --- synthesis/content_generator.py | 47 +++++++++++++++++++++++++++ synthesis/segment/segments.py | 1 + test/fixtures/page_generation/page.py | 46 +------------------------- 3 files changed, 49 insertions(+), 45 deletions(-) create mode 100644 synthesis/content_generator.py diff --git a/synthesis/content_generator.py b/synthesis/content_generator.py new file mode 100644 index 0000000..84d6d65 --- /dev/null +++ b/synthesis/content_generator.py @@ -0,0 +1,47 @@ +import itertools +from typing import List + +from PIL import Image +from funcy import lsplit, lfilter + +from cv_analysis.utils import every_nth, zipmap +from cv_analysis.utils.geometric import is_square_like +from cv_analysis.utils.merging import merge_related_rectangles +from cv_analysis.utils.postprocessing import remove_included, remove_overlapping +from cv_analysis.utils.rectangle import Rectangle +from synthesis.random import rnd +from synthesis.segment.segments import ( + generate_random_text_block, + generate_recursive_random_table_with_caption, + generate_random_plot_with_caption, +) + + +class ContentGenerator: + def __init__(self): + self.constrain_layouts = True + + def __call__(self, boxes: List[Rectangle]) -> Image: + rnd.shuffle(boxes) + + figure_boxes, text_boxes = lsplit(is_square_like, boxes) + + if self.constrain_layouts: + figure_boxes = merge_related_rectangles(figure_boxes) + figure_boxes = lfilter(is_square_like, figure_boxes) + text_boxes = merge_related_rectangles(text_boxes) + + boxes = list( + itertools.chain( + map(generate_random_text_block, every_nth(2, text_boxes)), + *zipmap(generate_recursive_random_table_with_caption, every_nth(2, text_boxes[1:])), + *zipmap(generate_recursive_random_table_with_caption, every_nth(2, figure_boxes)), + *zipmap(generate_random_plot_with_caption, every_nth(2, figure_boxes[1:])), + ) + ) + + if self.constrain_layouts: + boxes = remove_included(boxes) + boxes = remove_overlapping(boxes) + + return boxes diff --git a/synthesis/segment/segments.py b/synthesis/segment/segments.py index adfb078..910a27b 100644 --- a/synthesis/segment/segments.py +++ b/synthesis/segment/segments.py @@ -39,6 +39,7 @@ def generate_text_block(rectangle: Rectangle, text) -> ContentRectangle: def generate_random_plot_with_caption(rectangle: Rectangle): + # TODO: deduplicate with generate_random_table_with_caption plot_box, caption_box = split_into_figure_and_caption(rectangle) plot_box = generate_random_plot(plot_box) caption_box = generate_random_image_caption(caption_box) diff --git a/test/fixtures/page_generation/page.py b/test/fixtures/page_generation/page.py index c106172..b2595ec 100644 --- a/test/fixtures/page_generation/page.py +++ b/test/fixtures/page_generation/page.py @@ -1,4 +1,3 @@ -import itertools import sys from typing import Tuple, Iterable, List @@ -9,19 +8,11 @@ from PIL import Image, ImageEnhance from PIL.Image import Transpose from loguru import logger -from cv_analysis.utils import zipmap, every_nth from cv_analysis.utils.conversion import normalize_image_format_to_array, normalize_image_format_to_pil -from cv_analysis.utils.geometric import is_square_like from cv_analysis.utils.image_operations import blur, sharpen, overlay, superimpose -from cv_analysis.utils.merging import merge_related_rectangles -from cv_analysis.utils.postprocessing import remove_overlapping, remove_included +from synthesis.content_generator import ContentGenerator from synthesis.partitioner.two_column import TwoColumnPagePartitioner from synthesis.random import rnd -from synthesis.segment.segments import ( - generate_random_plot_with_caption, - generate_recursive_random_table_with_caption, - generate_random_text_block, -) from synthesis.segment.table.table import paste_contents logger.remove() @@ -32,8 +23,6 @@ from funcy import ( juxt, compose, identity, - lsplit, - lfilter, ) from cv_analysis.locations import TEST_PAGE_TEXTURES_DIR @@ -186,39 +175,6 @@ def size(dpi, orientation): return size -class ContentGenerator: - def __init__(self): - self.constrain_layouts = True - - def __call__(self, boxes: List[Rectangle]) -> Image: - rnd.shuffle(boxes) - - figure_boxes, text_boxes = lsplit(is_square_like, boxes) - - if self.constrain_layouts: - figure_boxes = merge_related_rectangles(figure_boxes) - figure_boxes = lfilter(is_square_like, figure_boxes) - text_boxes = merge_related_rectangles(text_boxes) - - boxes = list( - itertools.chain( - map(generate_random_text_block, every_nth(2, text_boxes)), - *zipmap(generate_recursive_random_table_with_caption, every_nth(2, text_boxes[1:])), - *zipmap(generate_recursive_random_table_with_caption, every_nth(2, figure_boxes)), - *zipmap(generate_random_plot_with_caption, every_nth(2, figure_boxes[1:])), - ) - ) - - if self.constrain_layouts: - boxes = remove_included(boxes) - boxes = remove_overlapping(boxes) - - return boxes - - -# TODO: deduplicate with generate_random_table_with_caption - - @pytest.fixture( params=[ TwoColumnPagePartitioner,