image-classification-service/test/unit_tests/image_extractor_test.py
2022-04-13 17:31:33 +02:00

79 lines
2.6 KiB
Python

import random
from operator import itemgetter
import fitz
import fpdf
import numpy as np
import pytest
from PIL import Image
from funcy import first, rest
from image_prediction.estimator.preprocessor.utils import images_to_batch_tensor
from image_prediction.extraction import extract_images_from_pdf
from image_prediction.image_extractor.extractor import ImageMetadataPair
from image_prediction.image_extractor.extractors.parsable import extract_pages, get_image_infos, has_alpha_channel
from image_prediction.info import Info
from test.conftest import add_image, pdf_stream
@pytest.mark.parametrize("extractor_type", ["mock"])
@pytest.mark.parametrize("batch_size", [1, 2, 16])
def test_image_extractor_mock(image_extractor, images):
images_extracted, metadata = map(list, zip(*image_extractor(images)))
assert images_extracted == images
@pytest.mark.parametrize("extractor_type", ["parsable_pdf", "default"])
@pytest.mark.parametrize("input_size", [{"depth": 3, "width": 170, "height": 220}], indirect=["input_size"])
@pytest.mark.parametrize("alpha", [False, True])
def test_parsable_pdf_image_extractor(image_extractor, pdf, images, metadata, input_size, alpha):
images_extracted, metadata_extracted = map(list, extract_images_from_pdf(pdf, image_extractor))
if not alpha:
assert np.allclose(images_to_batch_tensor(images_extracted), images_to_batch_tensor(images))
assert list(metadata_extracted) == metadata
@pytest.mark.parametrize("batch_size", [1, 2, 16])
def test_extract_pages(pdf):
doc = fitz.Document(stream=pdf)
max_index = max(0, doc.page_count - 1)
i = random.randint(0, max(0, max_index - 1))
j = random.randint(i + 1, max_index) if max_index > 0 else 0
page_range = range(i, j)
pages = list(extract_pages(doc, page_range))
assert all((isinstance(p, fitz.Page) for p in pages))
assert len(pages) == len(page_range)
@pytest.mark.parametrize("suffix", ["gif", "png", "jpeg"])
@pytest.mark.parametrize("mode", ["RGB", "RGBA"])
def test_has_alpha_channel(base_patch_metadata, suffix, mode):
mode = "RGB" if suffix == "jpeg" else mode
pdf = fpdf.FPDF(unit="pt")
image = Image.new(mode, itemgetter(Info.WIDTH, Info.HEIGHT)(base_patch_metadata), color=(10, 10, 10))
add_image(pdf, ImageMetadataPair(image, base_patch_metadata), suffix=suffix)
doc = fitz.Document(stream=pdf_stream(pdf))
page = first(doc)
xrefs = map(itemgetter("xref"), get_image_infos(page))
result = has_alpha_channel(doc, first(xrefs))
if mode == "RGBA":
assert result
if mode == "RGB":
assert not result
assert not list(rest(xrefs))
doc.close()