Interior tables are only an implementaton detail and should not produce target boxes. Instead now only their cells are yielded.
74 lines
2.6 KiB
Python
74 lines
2.6 KiB
Python
import itertools
|
|
from typing import List, Iterable, Union
|
|
|
|
from PIL import Image
|
|
from funcy import lsplit, lfilter
|
|
|
|
from cv_analysis.logging import logger
|
|
from cv_analysis.utils import every_nth, zipmap
|
|
from cv_analysis.utils.geometric import is_square_like
|
|
from cv_analysis.utils.merging import merge_related_rectangles
|
|
from cv_analysis.utils.postprocessing import remove_included, remove_overlapping
|
|
from cv_analysis.utils.rectangle import Rectangle
|
|
from synthesis.random import rnd
|
|
from synthesis.segment.content_rectangle import ContentRectangle
|
|
from synthesis.segment.recursive_content_rectangle import RecursiveContentRectangle
|
|
from synthesis.segment.segments import (
|
|
generate_random_text_block,
|
|
generate_recursive_random_table_with_caption,
|
|
generate_random_plot_with_caption,
|
|
)
|
|
from synthesis.segment.table.table import RecursiveRandomTable
|
|
|
|
|
|
class ContentGenerator:
|
|
def __init__(self):
|
|
self.constrain_layouts = True
|
|
|
|
def __call__(self, boxes: List[Rectangle]) -> Image:
|
|
rnd.shuffle(boxes)
|
|
|
|
figure_boxes, text_boxes = lsplit(is_square_like, boxes)
|
|
|
|
if self.constrain_layouts:
|
|
figure_boxes = merge_related_rectangles(figure_boxes)
|
|
figure_boxes = lfilter(is_square_like, figure_boxes)
|
|
text_boxes = merge_related_rectangles(text_boxes)
|
|
|
|
boxes = list(
|
|
itertools.chain(
|
|
map(generate_random_text_block, every_nth(2, text_boxes)),
|
|
*zipmap(generate_recursive_random_table_with_caption, every_nth(2, text_boxes[1:])),
|
|
*zipmap(generate_recursive_random_table_with_caption, every_nth(2, figure_boxes)),
|
|
*zipmap(generate_random_plot_with_caption, every_nth(2, figure_boxes[1:])),
|
|
)
|
|
)
|
|
|
|
if self.constrain_layouts:
|
|
boxes = remove_included(boxes)
|
|
boxes = remove_overlapping(boxes)
|
|
|
|
boxes = list(unpack_boxes(boxes))
|
|
for b in boxes:
|
|
logger.trace(f"Generated {b}")
|
|
|
|
return boxes
|
|
|
|
|
|
def unpack_boxes(boxes: Iterable[ContentRectangle], depth=0) -> Iterable[ContentRectangle]:
|
|
yield from itertools.chain.from_iterable(map(lambda b: unpack_box(b, depth), boxes))
|
|
|
|
|
|
def unpack_box(box: Union[ContentRectangle, RecursiveContentRectangle], depth=0) -> Iterable[ContentRectangle]:
|
|
# Boxes for recursive tables should be cells or the root table. Interior tables should not be recognized as such,
|
|
# but rather as cells.
|
|
if isinstance(box, RecursiveRandomTable):
|
|
if not depth:
|
|
yield box
|
|
else:
|
|
pass
|
|
else:
|
|
yield box
|
|
if box.has_child_boxes():
|
|
yield from unpack_boxes(box.child_boxes, depth + 1)
|