Remove unused code
This commit is contained in:
parent
4c65d906b8
commit
e8b4467265
361
test/fixtures/page_generation/page.py
vendored
361
test/fixtures/page_generation/page.py
vendored
@ -2,7 +2,6 @@ import abc
|
||||
import io
|
||||
import itertools
|
||||
import random
|
||||
import string
|
||||
import sys
|
||||
import textwrap
|
||||
from copy import deepcopy
|
||||
@ -12,11 +11,9 @@ from math import sqrt
|
||||
from pathlib import Path
|
||||
from typing import Tuple, Iterable, List
|
||||
|
||||
import albumentations as A
|
||||
import blend_modes
|
||||
import cv2 as cv
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from PIL import Image, ImageOps, ImageFont, ImageDraw, ImageEnhance
|
||||
from PIL.Image import Transpose
|
||||
@ -24,13 +21,9 @@ from faker import Faker
|
||||
from loguru import logger
|
||||
from matplotlib import pyplot as plt
|
||||
from matplotlib.colors import ListedColormap
|
||||
from tabulate import tabulate
|
||||
|
||||
from cv_analysis.table_parsing import isolate_vertical_and_horizontal_components
|
||||
from cv_analysis.utils import star, rconj, conj
|
||||
from cv_analysis.utils.common import normalize_to_gray_scale
|
||||
from cv_analysis.utils.conversion import normalize_image_format_to_array, normalize_image_format_to_pil
|
||||
from cv_analysis.utils.drawing import draw_rectangles
|
||||
from cv_analysis.utils.merging import merge_related_rectangles
|
||||
from cv_analysis.utils.postprocessing import remove_overlapping, remove_included
|
||||
from cv_analysis.utils.spacial import area
|
||||
@ -39,78 +32,10 @@ logger.remove()
|
||||
logger.add(sys.stderr, level="INFO")
|
||||
|
||||
random_seed = random.randint(0, 2**32 - 1)
|
||||
# random_seed = 3896311122
|
||||
# random_seed = 1986343479
|
||||
|
||||
# random_seed = 273244862 # empty large table
|
||||
# random_seed = 3717442900
|
||||
# random_seed = 2508340737
|
||||
|
||||
|
||||
# random_seed = 2212357755
|
||||
# random_seed = 3400335399
|
||||
|
||||
random_seed = 2973413116
|
||||
|
||||
rnd = random.Random(random_seed)
|
||||
logger.info(f"Random seed: {random_seed}")
|
||||
|
||||
#
|
||||
# transform = A.Compose(
|
||||
# [
|
||||
# # geometric transforms
|
||||
# A.HorizontalFlip(p=0.2),
|
||||
# A.RandomRotate90(p=0.2),
|
||||
# A.VerticalFlip(p=0.2),
|
||||
# # brightness and contrast transforms
|
||||
# A.OneOf(
|
||||
# [
|
||||
# A.RandomGamma(p=0.5),
|
||||
# A.RandomBrightnessContrast(p=0.5),
|
||||
# ],
|
||||
# p=0.5,
|
||||
# ),
|
||||
# # noise transforms
|
||||
# A.SomeOf(
|
||||
# [
|
||||
# A.Emboss(p=0.05),
|
||||
# A.ImageCompression(p=0.05),
|
||||
# A.PixelDropout(p=0.05),
|
||||
# ],
|
||||
# p=0.5,
|
||||
# n=2,
|
||||
# ),
|
||||
# # color transforms
|
||||
# A.SomeOf(
|
||||
# [
|
||||
# A.ColorJitter(p=1),
|
||||
# A.RGBShift(p=1, r_shift_limit=0.1, g_shift_limit=0.1, b_shift_limit=0.1),
|
||||
# A.ChannelShuffle(p=1),
|
||||
# ],
|
||||
# p=0.5,
|
||||
# n=3, # 3 => all
|
||||
# ),
|
||||
# # blurring and sharpening transforms
|
||||
# A.OneOf(
|
||||
# [
|
||||
# A.GaussianBlur(p=0.05),
|
||||
# A.MotionBlur(p=0.05, blur_limit=21),
|
||||
# A.Sharpen(p=0.05),
|
||||
# ],
|
||||
# p=0.5,
|
||||
# ),
|
||||
# # environmental transforms
|
||||
# A.OneOf(
|
||||
# [
|
||||
# A.RandomRain(p=0.2, rain_type="drizzle"),
|
||||
# A.RandomFog(p=0.2, fog_coef_upper=0.4),
|
||||
# A.RandomSnow(p=0.2),
|
||||
# ],
|
||||
# p=0.5,
|
||||
# ),
|
||||
# ],
|
||||
# p=0.5,
|
||||
# )
|
||||
from funcy import (
|
||||
juxt,
|
||||
compose,
|
||||
@ -138,46 +63,9 @@ from funcy import (
|
||||
|
||||
from cv_analysis.locations import TEST_PAGE_TEXTURES_DIR
|
||||
|
||||
# transform = A.Compose(
|
||||
# [
|
||||
# # brightness and contrast transforms
|
||||
# A.OneOf(
|
||||
# [
|
||||
# A.RandomGamma(p=0.2),
|
||||
# A.RandomBrightnessContrast(p=0.2, brightness_limit=0.05, contrast_limit=0.05),
|
||||
# ],
|
||||
# p=0.5,
|
||||
# ),
|
||||
# # color transforms
|
||||
# A.SomeOf(
|
||||
# [
|
||||
# A.ColorJitter(p=1),
|
||||
# A.RGBShift(p=1, r_shift_limit=0.3, g_shift_limit=0.3, b_shift_limit=0.3),
|
||||
# A.ChannelShuffle(p=1),
|
||||
# ],
|
||||
# p=1.0,
|
||||
# n=3, # 3 => all
|
||||
# ),
|
||||
# # # blurring and sharpening transforms
|
||||
# # A.OneOf(
|
||||
# # [
|
||||
# # A.GaussianBlur(p=0.05),
|
||||
# # A.MotionBlur(p=0.05, blur_limit=21),
|
||||
# # A.Sharpen(p=0.05),
|
||||
# # ],
|
||||
# # p=0.0,
|
||||
# # ),
|
||||
# ]
|
||||
# )
|
||||
from cv_analysis.utils.display import show_image
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
|
||||
transform = A.Compose(
|
||||
[
|
||||
# A.ColorJitter(p=1),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
Color = Tuple[int, int, int]
|
||||
|
||||
@ -421,9 +309,6 @@ def pad_image_to_size(image: Image, size: Tuple[int, int]) -> Image:
|
||||
if image.size[0] > size[0] or image.size[1] > size[1]:
|
||||
raise ValueError(f"Image size {image.size} is larger than target size {size}.")
|
||||
|
||||
# get average alpha value from image
|
||||
# alpha = int(np.mean(np.array(image.split()[-1]))) or 255
|
||||
# padded = Image.new(image.mode, size, color=(255, 255, 255, alpha))
|
||||
padded = Image.new(image.mode, size, color=255)
|
||||
|
||||
pasting_coords = compute_pasting_coordinates(image, padded)
|
||||
@ -484,15 +369,6 @@ def page_with_translucent_content(
|
||||
texture = random_flip(texture)
|
||||
texture = texture_fn(texture)
|
||||
|
||||
########## A
|
||||
# page_content = multiply_alpha_where_alpha_channel_is_nonzero(page_content, factor=0.6)
|
||||
# page = superimpose_texture_with_transparency(texture, page_content, crop_to_content=False)
|
||||
########## B
|
||||
# texture.putalpha(255)
|
||||
# # texture.show()
|
||||
# page_content.show()
|
||||
# page = blend(*map(np.array, (page_content, texture)))
|
||||
########## C
|
||||
texture.putalpha(255)
|
||||
page_content.putalpha(255)
|
||||
factor = 1.2
|
||||
@ -509,7 +385,6 @@ def page_with_translucent_content(
|
||||
),
|
||||
opacity=1,
|
||||
).astype(np.uint8)
|
||||
##########
|
||||
|
||||
return page, boxes
|
||||
|
||||
@ -533,108 +408,6 @@ def to_array(image: Image) -> np.ndarray:
|
||||
return np.array(image).astype(np.float32)
|
||||
|
||||
|
||||
def provide_image_format(required_format):
|
||||
def inner(fn):
|
||||
def inner(image, *args, **kwargs):
|
||||
|
||||
ret = fn(converter(image), *args, **kwargs)
|
||||
|
||||
if get_image_format(image) != required_format:
|
||||
ret = back_converter(ret)
|
||||
|
||||
return ret
|
||||
|
||||
converter = {
|
||||
"array": normalize_image_format_to_array,
|
||||
"pil": normalize_image_format_to_pil,
|
||||
}[required_format]
|
||||
|
||||
back_converter = {
|
||||
"array": normalize_image_format_to_pil,
|
||||
"pil": normalize_image_format_to_array,
|
||||
}[required_format]
|
||||
|
||||
return inner
|
||||
|
||||
return inner
|
||||
|
||||
|
||||
@provide_image_format("array")
|
||||
def set_alpha_where_color_channels_are_nonzero(image: np.ndarray, alpha: int) -> np.ndarray:
|
||||
"""Sets the alpha channel of an image to a given value where the color channels are nonzero."""
|
||||
|
||||
assert image.ndim == 3
|
||||
assert image.shape[-1] == 4
|
||||
assert 0 <= alpha <= 255
|
||||
|
||||
image = image.copy()
|
||||
image[..., -1] = np.where(np.logical_or.reduce(image[..., :-1] > 0, axis=-1), alpha, image[..., -1])
|
||||
return image
|
||||
|
||||
|
||||
@provide_image_format("array")
|
||||
def multiply_alpha_where_alpha_channel_is_nonzero(image: np.ndarray, factor: float) -> np.ndarray:
|
||||
"""Increases the alpha channel of an image where the alpha channel is nonzero."""
|
||||
|
||||
assert image.ndim == 3
|
||||
assert image.shape[-1] == 4
|
||||
|
||||
image = image.copy().astype(np.float32)
|
||||
image[..., -1] = np.where(image[..., -1] > 0, image[..., -1] * factor, image[..., -1])
|
||||
image[..., -1] = np.clip(image[..., -1], 0, 255)
|
||||
|
||||
assert image.max() <= 255
|
||||
assert image.min() >= 0
|
||||
return image
|
||||
|
||||
|
||||
@provide_image_format("array")
|
||||
def set_alpha_where_alpha_channel_is_nonzero(image: np.ndarray, alpha: int) -> np.ndarray:
|
||||
"""Sets the alpha channel of an image to a given value where the alpha channel is nonzero."""
|
||||
|
||||
assert image.ndim == 3
|
||||
assert image.shape[-1] == 4
|
||||
assert 0 <= alpha <= 255
|
||||
|
||||
image = image.copy()
|
||||
image[..., -1] = np.where(image[..., -1] > 0, alpha, image[..., -1])
|
||||
return image
|
||||
|
||||
|
||||
def get_image_format(image):
|
||||
if isinstance(image, np.ndarray):
|
||||
return "array"
|
||||
elif isinstance(image, Image.Image):
|
||||
return "pil"
|
||||
else:
|
||||
raise ValueError(f"Unknown image format: {type(image)}")
|
||||
|
||||
|
||||
def blend(a: np.ndarray, b: np.ndarray):
|
||||
"""Reference: https://stackoverflow.com/a/52143032"""
|
||||
|
||||
assert a.max() <= 255
|
||||
assert a.min() >= 0
|
||||
|
||||
assert b.max() <= 255
|
||||
assert b.min() >= 0
|
||||
|
||||
a = a.astype(float) / 255
|
||||
b = b.astype(float) / 255 # make float on range 0-1
|
||||
|
||||
mask = a >= 0.5 # generate boolean mask of everywhere a > 0.5
|
||||
ab = np.zeros_like(a) # generate an output container for the blended image
|
||||
|
||||
# now do the blending
|
||||
ab[~mask] = (2 * a * b)[~mask] # 2ab everywhere a<0.5
|
||||
ab[mask] = (1 - 2 * (1 - a) * (1 - b))[mask] # else this
|
||||
|
||||
assert ab.max() <= 1
|
||||
assert ab.min() >= 0
|
||||
|
||||
return (ab * 255).astype(np.uint8)
|
||||
|
||||
|
||||
class ContentRectangle(Rectangle):
|
||||
def __init__(self, x1, y1, x2, y2, content=None):
|
||||
super().__init__(x1, y1, x2, y2)
|
||||
@ -724,21 +497,11 @@ def split_into_figure_and_caption(rectangle: Rectangle):
|
||||
|
||||
def generate_random_plot(rectangle: Rectangle) -> ContentRectangle:
|
||||
block = RandomPlot(*rectangle.coords)
|
||||
# block.content = attrgetter("content")(block)
|
||||
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
||||
block.generate_random_plot(rectangle)
|
||||
return block
|
||||
|
||||
|
||||
def generate_random_table(rectangle: Rectangle) -> ContentRectangle:
|
||||
block = RandomTable(*rectangle.coords)
|
||||
block.content = (
|
||||
rectangle.content if isinstance(rectangle, (ContentRectangle, RandomContentRectangle)) else None
|
||||
) # TODO: Refactor
|
||||
block.generate_random_table(rectangle)
|
||||
return block
|
||||
|
||||
|
||||
def generate_recursive_random_table(rectangle: Rectangle, **kwargs) -> ContentRectangle:
|
||||
block = RecursiveRandomTable(*rectangle.coords, **kwargs)
|
||||
if isinstance(rectangle, RecursiveRandomTable):
|
||||
@ -771,10 +534,6 @@ class RandomContentRectangle(ContentRectangle):
|
||||
|
||||
|
||||
class Size(Enum):
|
||||
# SMALL = sqrt(100**2)
|
||||
# MEDIUM = sqrt((100 * 3) ** 2)
|
||||
# LARGE = sqrt((100 * 10) ** 2)
|
||||
|
||||
SMALL = 120
|
||||
MEDIUM = 180
|
||||
LARGE = 300
|
||||
@ -983,8 +742,6 @@ class RecursiveRandomTable(RandomContentRectangle):
|
||||
c1.draw_top_border(width=1)
|
||||
c1.draw_bottom_border(width=1)
|
||||
|
||||
# self.content = superimpose_texture_with_transparency(c.content, self.content)
|
||||
|
||||
x1, y1, x2, y2 = self.coords
|
||||
c2 = Cell(x1, y1 + self.double_rule_width, x2, y2 - self.double_rule_width)
|
||||
c2.draw_top_border(width=1)
|
||||
@ -1136,65 +893,6 @@ def compute_scaled_coordinates(rectangle: Rectangle, factor: float) -> Tuple[int
|
||||
return x1, y1, x2, y2
|
||||
|
||||
|
||||
class RandomTable(RandomContentRectangle):
|
||||
def __init__(self, x1, y1, x2, y2, seed=None):
|
||||
super().__init__(x1, y1, x2, y2, seed=seed)
|
||||
self.font = pick_random_mono_space_font_available_on_system(includes=("bold",), excludes=("italic", "oblique"))
|
||||
|
||||
def generate_random_table(self, rectangle: Rectangle):
|
||||
"""Generates the image of a random table.
|
||||
|
||||
Reference: https://stackoverflow.com/questions/35634238/how-to-save-a-pandas-dataframe-table-as-a-png
|
||||
"""
|
||||
|
||||
text_table = self.generate_random_ascii_table(rectangle)
|
||||
table_lines = text_table.split("\n")
|
||||
image = write_lines_to_image(table_lines, rectangle)
|
||||
self.join_lines(image)
|
||||
|
||||
self.content = image if not self.content else superimpose_texture_with_transparency(self.content, image)
|
||||
|
||||
def generate_random_dataframe(self, rectangle: Rectangle):
|
||||
"""Generates a random dataframe that has as many rows and columns as to fit the given rectangle."""
|
||||
|
||||
image = Image.new("RGB", (rectangle.width, rectangle.height), color="white")
|
||||
draw = ImageDraw.Draw(image)
|
||||
text_size = draw.textsize("dummy", font=ImageFont.load_default())[1]
|
||||
|
||||
rows = rectangle.height // text_size
|
||||
|
||||
col_names = list(string.ascii_uppercase)
|
||||
|
||||
cols = min(rectangle.width // text_size, len(col_names))
|
||||
|
||||
df = pd.DataFrame(
|
||||
np.random.randint(0, 100, size=(rows, cols)),
|
||||
columns=col_names[:cols],
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
def join_lines(self, table: Image.Image):
|
||||
table = normalize_image_format_to_array(table)
|
||||
table = normalize_to_gray_scale(table)
|
||||
grid = isolate_vertical_and_horizontal_components(table)
|
||||
# grid = cv2.bitwise_not(grid)
|
||||
|
||||
def generate_random_ascii_table(self, rectangle: Rectangle):
|
||||
df = self.generate_random_dataframe(rectangle)
|
||||
table_format = rnd.choice(
|
||||
[
|
||||
# "simple",
|
||||
"grid",
|
||||
# "presto",
|
||||
# "psql",
|
||||
# "rst",
|
||||
]
|
||||
)
|
||||
text_table = tabulate(df, headers="keys", tablefmt=table_format)
|
||||
return text_table
|
||||
|
||||
|
||||
def dump_plt_to_image(rectangle):
|
||||
buf = io.BytesIO()
|
||||
plt.savefig(buf, format="png", transparent=True)
|
||||
@ -1685,7 +1383,6 @@ def generate_random_text_lines(rectangle: Rectangle, line_formatter=identity, n_
|
||||
|
||||
|
||||
def paste_content(page, content_box: ContentRectangle):
|
||||
# assert page.mode == "RGB"
|
||||
assert content_box.content.mode == "RGBA"
|
||||
page.paste(content_box.content, (content_box.x1, content_box.y1), content_box.content)
|
||||
return page
|
||||
@ -1719,9 +1416,6 @@ class PagePartitioner(abc.ABC):
|
||||
|
||||
box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin)
|
||||
boxes = lflatten(self.generate_content_boxes(box))
|
||||
# boxes = self.drop_small_boxes(boxes, *page.size)
|
||||
# boxes = merge_related_rectangles(boxes)
|
||||
# boxes = list(boxes)
|
||||
return boxes
|
||||
|
||||
@abc.abstractmethod
|
||||
@ -1765,9 +1459,6 @@ class RandomPagePartitioner(PagePartitioner):
|
||||
class TwoColumnPagePartitioner(PagePartitioner):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
# self.recursive_margin_percentage = 0.1
|
||||
# self.left_margin_percentage = 0.1
|
||||
# self.right_margin_percentage = 0.1
|
||||
self.max_recursion_depth = 3
|
||||
|
||||
def generate_content_boxes(self, box: Rectangle, depth=0):
|
||||
@ -1819,55 +1510,5 @@ def drop_small_boxes(boxes: Iterable[Rectangle], page_width, page_height, min_pe
|
||||
|
||||
|
||||
def draw_boxes(page: Image, boxes: Iterable[Rectangle]):
|
||||
page = draw_rectangles(page, boxes, filled=False, annotate=True)
|
||||
# page = draw_rectangles(page, boxes, filled=False, annotate=True)
|
||||
show_image(page, backend="pil")
|
||||
|
||||
|
||||
# class RandomPageNumber(ContentRectangle):
|
||||
# def __init__(self, *args, **kwargs):
|
||||
# super().__init__(*args, **kwargs)
|
||||
# self.page_number = random.randint(1, 1000)
|
||||
# self.margin_distance_percentage = 0.05
|
||||
# self.margin_distance_x = int(self.width * self.margin_distance_percentage)
|
||||
# self.margin_distance_y = int(self.height * self.margin_distance_percentage)
|
||||
#
|
||||
# self.location_coordinates = self.location_to_coordinates(self.pick_location())
|
||||
#
|
||||
# def __repr__(self):
|
||||
# return f"PageNumber({self.page_number}, {super().__repr__()})"
|
||||
#
|
||||
# def generate_random_page_number(self, rectangle: Rectangle):
|
||||
# logger.info(f"Generating random page number for {rectangle}")
|
||||
# self.add_page_number()
|
||||
#
|
||||
# def add_page_number(self):
|
||||
# page_number_image = self.get_page_number_image()
|
||||
# self.content.paste(page_number_image, self.location_coordinates, page_number_image)
|
||||
#
|
||||
# def pick_location(self):
|
||||
# return rnd.choice(["top_left", "top_right", "bottom_left", "bottom_right", "center_top", "center_bottom"])
|
||||
#
|
||||
# def location_to_coordinates(self, locations: str):
|
||||
# if locations == "top_left":
|
||||
# return self.x1 + self.margin_distance_x, self.y1 + self.margin_distance_y
|
||||
# elif locations == "top_right":
|
||||
# return self.x2 - self.margin_distance_x, self.y1 + self.margin_distance_y
|
||||
# elif locations == "bottom_left":
|
||||
# return self.x1 + self.margin_distance_x, self.y2 - self.margin_distance_y
|
||||
# elif locations == "bottom_right":
|
||||
# return self.x2 - self.margin_distance_x, self.y2 - self.margin_distance_y
|
||||
# elif locations == "center_top":
|
||||
# return self.x1 + self.width // 2, self.y1 + self.margin_distance_y
|
||||
# elif locations == "center_bottom":
|
||||
# return self.x1 + self.width // 2, self.y2 - self.margin_distance_y
|
||||
# else:
|
||||
# raise ValueError(f"Unknown location: {locations}")
|
||||
#
|
||||
# def get_page_number_image(self):
|
||||
# font = pick_random_mono_space_font_available_on_system(excludes=("italic", "oblique"))
|
||||
# page_number_image = Image.new("RGBA", (100, 100), (255, 255, 255, 0))
|
||||
#
|
||||
# draw = ImageDraw.Draw(page_number_image)
|
||||
# draw.text((0, 0), str(self.page_number), font=font, fill=(0, 0, 0, 255))
|
||||
#
|
||||
# return page_number_image
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user