From d5d67cb06409448d03d366f380c41020a097656d Mon Sep 17 00:00:00 2001 From: Matthias Bisping Date: Wed, 11 Jan 2023 12:17:07 +0100 Subject: [PATCH] Fix image format (RGB/A, float/uint8, [0, 1/255]) issues --- cv_analysis/utils/display.py | 56 ++++++---- test/fixtures/page_generation/page.py | 152 ++++++++++++++++---------- 2 files changed, 126 insertions(+), 82 deletions(-) diff --git a/cv_analysis/utils/display.py b/cv_analysis/utils/display.py index ca6781f..ea286e3 100644 --- a/cv_analysis/utils/display.py +++ b/cv_analysis/utils/display.py @@ -1,38 +1,48 @@ import cv2 +import numpy as np from PIL import Image +from PIL.Image import Image as Image_t from matplotlib import pyplot as plt -def show_image_cv2(image, maxdim=700): - h, w, c = image.shape - maxhw = max(h, w) - if maxhw > maxdim: - ratio = maxdim / maxhw - h = int(h * ratio) - w = int(w * ratio) - img = cv2.resize(image, (h, w)) - cv2.imshow("", img) - cv2.waitKey(0) - cv2.destroyAllWindows() - - -def show_image_mpl(image): - fig, ax = plt.subplots(1, 1) - fig.set_size_inches(20, 20) - ax.imshow(image, cmap="gray") - plt.show() - - -def show_image(image, backend="mpl"): +def show_image(image, backend="mpl", **kwargs): if backend == "mpl": - show_image_mpl(image) + show_image_mpl(image, **kwargs) elif backend == "cv2": - show_image_cv2(image) + show_image_cv2(image, **kwargs) elif backend == "pil": Image.fromarray(image).show() else: raise ValueError(f"Unknown backend: {backend}") +def show_image_cv2(image, maxdim=700, **kwargs): + h, w, c = image.shape + maxhw = max(h, w) + if maxhw > maxdim: + ratio = maxdim / maxhw + h = int(h * ratio) + w = int(w * ratio) + + img = cv2.resize(image, (h, w)) + cv2.imshow("", img) + cv2.waitKey(0) + cv2.destroyAllWindows() + + +def show_image_mpl(image, **kwargs): + if isinstance(image, Image_t): + # noinspection PyTypeChecker + image = np.array(image) + # noinspection PyArgumentList + assert image.max() <= 255 + fig, ax = plt.subplots(1, 1) + fig.set_size_inches(20, 20) + assert image.dtype == np.uint8 + ax.imshow(image, cmap="gray") + ax.title.set_text(kwargs.get("title", "")) + plt.show() + + def save_image(image, path): cv2.imwrite(path, image) diff --git a/test/fixtures/page_generation/page.py b/test/fixtures/page_generation/page.py index 659c56d..1117231 100644 --- a/test/fixtures/page_generation/page.py +++ b/test/fixtures/page_generation/page.py @@ -64,35 +64,42 @@ from PIL import Image # ) +# transform = A.Compose( +# [ +# # brightness and contrast transforms +# A.OneOf( +# [ +# A.RandomGamma(p=0.2), +# A.RandomBrightnessContrast(p=0.2, brightness_limit=0.05, contrast_limit=0.05), +# ], +# p=0.5, +# ), +# # color transforms +# A.SomeOf( +# [ +# A.ColorJitter(p=1), +# A.RGBShift(p=1, r_shift_limit=0.3, g_shift_limit=0.3, b_shift_limit=0.3), +# A.ChannelShuffle(p=1), +# ], +# p=1.0, +# n=3, # 3 => all +# ), +# # # blurring and sharpening transforms +# # A.OneOf( +# # [ +# # A.GaussianBlur(p=0.05), +# # A.MotionBlur(p=0.05, blur_limit=21), +# # A.Sharpen(p=0.05), +# # ], +# # p=0.0, +# # ), +# ] +# ) +from cv_analysis.utils.display import show_image + transform = A.Compose( [ - # # brightness and contrast transforms - A.OneOf( - [ - A.RandomGamma(p=0.2), - A.RandomBrightnessContrast(p=0.2, brightness_limit=0.05, contrast_limit=0.05), - ], - p=0.5, - ), - # color transforms - A.SomeOf( - [ - A.ColorJitter(p=1), - A.RGBShift(p=1, r_shift_limit=0.3, g_shift_limit=0.3, b_shift_limit=0.3), - A.ChannelShuffle(p=1), - ], - p=1.0, - n=3, # 3 => all - ), - # # blurring and sharpening transforms - # A.OneOf( - # [ - # A.GaussianBlur(p=0.05), - # A.MotionBlur(p=0.05, blur_limit=21), - # A.Sharpen(p=0.05), - # ], - # p=0.0, - # ), + # A.ColorJitter(p=1), ] ) @@ -100,14 +107,19 @@ transform = A.Compose( Color = Tuple[int, int, int] -@pytest.fixture(params=["portrait", "landscape"]) +@pytest.fixture( + params=[ + "portrait", + # "landscape", + ] +) def orientation(request): return request.param @pytest.fixture( params=[ - 30, + # 30, 100, ] ) @@ -130,7 +142,13 @@ def color_name(request): return request.param -@pytest.fixture(params=["smooth", "coarse", "neutral"]) +@pytest.fixture( + params=[ + # "smooth", + # "coarse", + "neutral", + ] +) def texture_name(request): return request.param @@ -140,7 +158,7 @@ def color(color_name): return { "brown": (0.5, 0.3, 0.2), "yellow": (0.5, 0.5, 0.0), - "sepia": (0.4, 0.3, 0.2), + "sepia": (173, 155, 109), "gray": (0.3, 0.3, 0.3), "white": (0.0, 0.0, 0.0), "light_red": (0.5, 0.0, 0.0), @@ -160,31 +178,62 @@ def texture_fn(texture_name, size): @pytest.fixture def texture(texture_fn, size, color): - noise = np.random.rand(*size) * 255 - noise = texture_fn(noise) - noise = color_shift_noise(noise, color) - noise = zero_out_below_threshold(noise, 0.1) - return Image.fromarray(noise.astype("uint8")) + noise_arr = np.random.rand(*size) * 255 + noise_arr = texture_fn(noise_arr) + noise_arr = color_shift_noise(noise_arr, color) + + show_image(noise_arr, backend="mpl", title="noise1") + noise_arr = zero_out_below_threshold(noise_arr, 0.4) + assert noise_arr.max() <= 255 + + show_image(noise_arr, backend="mpl", title="noise2") + noise_img = Image.fromarray(noise_arr) + show_image(noise_img, backend="mpl", title="noise3") + # noinspection PyTypeChecker + assert np.equal(noise_arr, np.array(noise_img)).all() + return noise_img def color_shift_noise(noise: np.ndarray, color: Color): """Creates a 3-tensor from a 2-tensor by stacking the 2-tensor three times weighted by the color tuple.""" assert noise.ndim == 2 assert isinstance(color, tuple) + assert max(color) <= 255 + assert noise.max() <= 255 + color = np.array(color) weights = color / color.sum() - alpha_channel = np.zeros(noise.shape) - colored_noise = np.stack([noise * weight for weight in weights] + [alpha_channel], axis=-1) + assert max(weights) <= 1 + + alpha_channel = np.ones(noise.shape) * 255 + colored_noise = np.stack([noise * weight for weight in weights] + [alpha_channel], axis=-1).astype(np.uint8) + assert colored_noise.shape == (*noise.shape, 4) + return colored_noise +def zero_out_below_threshold(texture, threshold): + assert texture.max() <= 255 + + threshold = int(texture[:, :, 0:3].sum(axis=2).max() * threshold) + + # texture[:, :, 3] = 100 + + threshold_mask = texture[:, :, 0:3].sum(axis=2) >= threshold + texture[~threshold_mask] = [0, 0, 0, 0] + + return texture + + @pytest.fixture def size(dpi, orientation): if orientation == "portrait": size = (8.5 * dpi, 11 * dpi) - else: + elif orientation == "landscape": size = (11 * dpi, 8.5 * dpi) + else: + raise ValueError(f"Unknown orientation: {orientation}") size = tuple(map(int, size)) return size @@ -192,33 +241,18 @@ def size(dpi, orientation): @pytest.fixture def blank_page(size, texture) -> np.ndarray: """Creates a blank page with a given orientation and dpi.""" - page = Image.fromarray(np.ones((*size, 3), dtype="uint8") * 255) + page = Image.fromarray(np.zeros((*size, 4), dtype=np.uint8) * 255) page = superimpose_texture_with_transparency(page, texture) - page = np.array(page) - page = transform(image=page)["image"] + show_image(page, backend="mpl", title="page") + # page = transform(image=page)["image"] return page def superimpose_texture_with_transparency(page: Image, texture: Image) -> Image: """Superimposes a noise image with transparency onto a page image.""" - assert page.mode == "RGB" + assert page.mode == "RGBA" assert texture.mode == "RGBA" assert page.size == texture.size - texture = texture.convert("RGBA") + show_image(texture, backend="mpl", title="texture") page.paste(texture, (0, 0), texture) return page - - -def zero_out_below_threshold(texture, threshold): - - threshold = int(255 * threshold) - - texture[:, :, 0] = (texture[:, :, 0] / texture[:, :, 0].max()) * 255 - texture[:, :, 1] = (texture[:, :, 1] / texture[:, :, 1].max()) * 255 - texture[:, :, 2] = (texture[:, :, 2] / texture[:, :, 2].max()) * 255 - texture[:, :, 3] = 25 - - threshold_mask = texture[:, :, 0] >= threshold - texture[~threshold_mask] = [0, 0, 0, 0] - - return texture