cv-analysis-service/synthesis/content_generator.py
Matthias Bisping 43688d0f0b Add filtering of interior tables in cell yielding
Interior tables are only an implementaton detail and should not produce
target boxes. Instead now only their cells are yielded.
2023-02-14 16:19:10 +01:00

74 lines
2.6 KiB
Python

import itertools
from typing import List, Iterable, Union
from PIL import Image
from funcy import lsplit, lfilter
from cv_analysis.logging import logger
from cv_analysis.utils import every_nth, zipmap
from cv_analysis.utils.geometric import is_square_like
from cv_analysis.utils.merging import merge_related_rectangles
from cv_analysis.utils.postprocessing import remove_included, remove_overlapping
from cv_analysis.utils.rectangle import Rectangle
from synthesis.random import rnd
from synthesis.segment.content_rectangle import ContentRectangle
from synthesis.segment.recursive_content_rectangle import RecursiveContentRectangle
from synthesis.segment.segments import (
generate_random_text_block,
generate_recursive_random_table_with_caption,
generate_random_plot_with_caption,
)
from synthesis.segment.table.table import RecursiveRandomTable
class ContentGenerator:
def __init__(self):
self.constrain_layouts = True
def __call__(self, boxes: List[Rectangle]) -> Image:
rnd.shuffle(boxes)
figure_boxes, text_boxes = lsplit(is_square_like, boxes)
if self.constrain_layouts:
figure_boxes = merge_related_rectangles(figure_boxes)
figure_boxes = lfilter(is_square_like, figure_boxes)
text_boxes = merge_related_rectangles(text_boxes)
boxes = list(
itertools.chain(
map(generate_random_text_block, every_nth(2, text_boxes)),
*zipmap(generate_recursive_random_table_with_caption, every_nth(2, text_boxes[1:])),
*zipmap(generate_recursive_random_table_with_caption, every_nth(2, figure_boxes)),
*zipmap(generate_random_plot_with_caption, every_nth(2, figure_boxes[1:])),
)
)
if self.constrain_layouts:
boxes = remove_included(boxes)
boxes = remove_overlapping(boxes)
boxes = list(unpack_boxes(boxes))
for b in boxes:
logger.trace(f"Generated {b}")
return boxes
def unpack_boxes(boxes: Iterable[ContentRectangle], depth=0) -> Iterable[ContentRectangle]:
yield from itertools.chain.from_iterable(map(lambda b: unpack_box(b, depth), boxes))
def unpack_box(box: Union[ContentRectangle, RecursiveContentRectangle], depth=0) -> Iterable[ContentRectangle]:
# Boxes for recursive tables should be cells or the root table. Interior tables should not be recognized as such,
# but rather as cells.
if isinstance(box, RecursiveRandomTable):
if not depth:
yield box
else:
pass
else:
yield box
if box.has_child_boxes():
yield from unpack_boxes(box.child_boxes, depth + 1)