diff --git a/test/conftest.py b/test/conftest.py index 465a66f..95cd2b9 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -3,33 +3,20 @@ import logging import os import random import string -import tempfile from functools import partial from itertools import starmap from operator import itemgetter -from typing import Iterable import fpdf import numpy as np import pytest -from PIL import Image -from frozendict import frozendict from funcy import rcompose, merge -from image_prediction.classifier.classifier import Classifier -from image_prediction.classifier.image_classifier import ImageClassifier -from image_prediction.estimator.adapter.adapter import EstimatorAdapter -from image_prediction.estimator.preprocessor.preprocessors.basic import BasicPreprocessor -from image_prediction.estimator.preprocessor.utils import image_to_normalized_tensor from image_prediction.exceptions import ( - UnknownEstimatorAdapter, - UnknownImageExtractor, UnknownDatabaseType, UnknownLabelFormat, ) from image_prediction.image_extractor.extractor import ImageMetadataPair -from image_prediction.image_extractor.extractors.mock import ImageExtractorMock -from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor from image_prediction.info import Info from image_prediction.label_mapper.mappers.numeric import IndexMapper from image_prediction.label_mapper.mappers.probability import ProbabilityMapper, ProbabilityMapperKeys @@ -39,8 +26,12 @@ from image_prediction.model_loader.loader import ModelLoader from image_prediction.model_loader.loaders.mlflow import MlflowConnector from image_prediction.pipeline import load_pipeline from image_prediction.redai_adapter.mlflow import MlflowModelReader -from image_prediction.redai_adapter.model import PredictionModelHandle from image_prediction.utils import get_logger +from test.utils.generation.image import array_to_image +from test.utils.generation.pdf import add_image, pdf_stream + +pytest_plugins = ['test.utils.model'] + @pytest.fixture(autouse=True) @@ -52,46 +43,6 @@ def mute_logger(): logger.setLevel(level) -@pytest.fixture -def image_extractor(extractor_type): - if extractor_type == "mock": - return ImageExtractorMock() - elif extractor_type == "parsable_pdf": - return ParsablePDFImageExtractor() - elif extractor_type == "default": - return None - else: - raise UnknownImageExtractor(f"No image extractor for type {extractor_type} was specified.") - - -@pytest.fixture -def image_classifier(classifier, monkeypatch, batch_of_expected_string_labels): - return ImageClassifier(classifier, preprocessor=BasicPreprocessor()) - - -@pytest.fixture -def classifier(estimator_adapter, label_mapper): - classifier = Classifier(estimator_adapter, label_mapper) - return classifier - - -@pytest.fixture -def estimator_mock(): - class EstimatorMock: - @staticmethod - def predict(batch): - return [None for _ in batch] - - @staticmethod - def predict_proba(batch): - return [None for _ in batch] - - def __call__(self, batch): - return self.predict(batch) - - return EstimatorMock() - - @pytest.fixture def label_mapper(label_format, classes): if label_format == "index": @@ -129,51 +80,6 @@ def expected_predictions(label_format, batch_of_expected_numeric_labels, batch_o raise UnknownLabelFormat(f"No label mapper for label format {label_format} was specified.") -@pytest.fixture -def estimator_adapter( - estimator_type, estimator_mock, keras_model, model_handle_mock, output_batch_generator, monkeypatch -): - if estimator_type == "mock": - estimator_adapter = EstimatorAdapter(estimator_mock) - elif estimator_type == "keras": - estimator_adapter = EstimatorAdapter(keras_model) - elif estimator_type == "redai": - estimator_adapter = EstimatorAdapter(PredictionModelHandle(model_handle_mock)) - else: - raise UnknownEstimatorAdapter(f"No adapter for estimator type {estimator_type} was specified.") - - def mock_predict(batch): - # Run real predict function to test for mechanical issues, but return externally defined - # predictions to test the callers of the estimator adapter against the expected predictions - return [next(output_batch_generator) for _ in _predict(batch)] - - _predict = estimator_adapter.predict - monkeypatch.setattr(estimator_adapter, "predict", mock_predict) - - return estimator_adapter - - -@pytest.fixture -def keras_model(input_size): - import os - - os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" - - import tensorflow as tf - - tf.keras.backend.set_image_data_format("channels_last") - - inputs = tf.keras.Input(shape=input_size) - conv = tf.keras.layers.Conv2D(3, 3) - dense = tf.keras.layers.Dense(10) - - outputs = tf.keras.layers.Dense(10)(dense(conv(inputs))) - model = tf.keras.Model(inputs=inputs, outputs=outputs) - model.compile() - - return model - - @pytest.fixture def images(input_batch): return list(map(array_to_image, input_batch)) @@ -205,21 +111,6 @@ def __input_size(request): return itemgetter("width", "height", "depth")(request.param) -def array_to_image(array): - assert np.all(array <= 1) - assert np.all(array >= 0) - - if array.shape[-1] == 3: - mode = "RGB" - elif array.shape[-1] == 4: - mode = "RGBA" - else: - raise ValueError(f"Unexpected number of channels {array.shape[-1]}. Expected 3 or 4.") - - # noinspection PyTypeChecker - return Image.fromarray(np.uint8(array * 255), mode=mode) - - @pytest.fixture def batch_of_expected_string_labels(batch_of_expected_numeric_labels, classes): return map_labels(batch_of_expected_numeric_labels, classes) @@ -340,44 +231,6 @@ def pdf(image_metadata_pairs): return pdf_stream(pdf) -def add_image(pdf, image_metadata_pair, suffix="png"): - while fewer_pages_then_required(image_metadata_pair.metadata[Info.PAGE_IDX], pdf): - pdf.add_page() - - add_image_to_last_page(pdf, image_metadata_pair, suffix=suffix) - - -def fewer_pages_then_required(page_idx, pdf): - return page_idx > pdf.page - 1 - - -def pdf_stream(pdf: fpdf.fpdf.FPDF): - return pdf.output(dest="S").encode("latin1") - - -def add_image_to_last_page(pdf: fpdf.fpdf.FPDF, image_metadata_pair, suffix): - image, metadata = image_metadata_pair - x, y, w, h = itemgetter(Info.X1, Info.Y1, Info.WIDTH, Info.HEIGHT)(metadata) - - with tempfile.NamedTemporaryFile(suffix=f".{suffix}") as temp_image: - image.save(temp_image.name) - pdf.image(temp_image.name, x=x, y=y, w=w, h=h, type=suffix) - - -@pytest.fixture -def model(): - class Model: - @staticmethod - def predict(*args): - return True - - @staticmethod - def predict_proba(*args): - return True - - return Model() - - @pytest.fixture def model_database_record_identifier(): return "".join(random.sample(string.ascii_letters, k=10)) @@ -429,24 +282,6 @@ def mlflow_reader(mlruns_dir): return MlflowModelReader(mlruns_dir) -@pytest.fixture -def model_handle_mock(estimator_mock): - class ModelHandleMock: - def __init__(self): - self.model = estimator_mock - - def prep_images(self, batch): - return [None for _ in batch] - - def predict(self, batch): - return [None for _ in batch] - - def predict_proba(self, batch): - return [None for _ in batch] - - return ModelHandleMock() - - @pytest.fixture def real_pdf(): with open(os.path.join(TEST_DATA_DIR, "f2dc689ca794fccb8cd38b95f2bf6ba9.pdf"), "rb") as f: @@ -465,10 +300,6 @@ def pipeline(): return pipeline -def transform_equal(a, b): - return (list(a) if isinstance(a, map) else a) == b - - def get_base_position_metadata(width, height, page_width, page_height): return { Info.WIDTH: width, @@ -504,23 +335,3 @@ def page_height(request): @pytest.fixture(params=[100, 310]) def page_width(request): return request.param - - -def random_single_color_image_from_metadata(metadata): - image = Image.new( - "RGB", (metadata[Info.WIDTH], metadata[Info.HEIGHT]), color=tuple(map(int, np.random.uniform(size=3) * 255)) - ) - return image - - -def gray_image_from_metadata(metadata): - image = Image.new("RGB", (metadata[Info.WIDTH], metadata[Info.HEIGHT]), color=(100, 100, 100)) - return image - - -def images_equal(im1: Image, im2: Image, **kwargs): - return np.allclose(image_to_normalized_tensor(im1), image_to_normalized_tensor(im2), **kwargs) - - -def metadata_equal(mdat1: Iterable, mdat2: Iterable): - return set(map(frozendict, mdat1)) == set(map(frozendict, mdat2)) diff --git a/test/fixtures/__init__.py b/test/fixtures/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/unit_tests/compositor_test.py b/test/unit_tests/compositor_test.py index 0f1152b..cca34fc 100644 --- a/test/unit_tests/compositor_test.py +++ b/test/unit_tests/compositor_test.py @@ -4,7 +4,7 @@ from image_prediction.compositor.compositor import TransformerCompositor from image_prediction.formatter.formatters.camel_case import Snake2CamelCaseKeyFormatter from image_prediction.formatter.formatters.enum import EnumFormatter from image_prediction.formatter.formatters.identity import IdentityFormatter -from test.conftest import transform_equal +from test.utils.comparison import transform_equal def test_identity(metadata): diff --git a/test/unit_tests/coordinate_transformer_test.py b/test/unit_tests/coordinate_transformer_test.py index 18e69f7..935c95e 100644 --- a/test/unit_tests/coordinate_transformer_test.py +++ b/test/unit_tests/coordinate_transformer_test.py @@ -12,7 +12,10 @@ from image_prediction.info import Info from image_prediction.transformer.transformers.coordinate.fitz import FitzCoordinateTransformer from image_prediction.transformer.transformers.coordinate.fpdf import FPDFCoordinateTransformer from image_prediction.transformer.transformers.coordinate.pdfnet import PDFNetCoordinateTransformer -from test.conftest import array_to_image, add_image, transform_equal, get_base_position_metadata +from test.conftest import get_base_position_metadata +from test.utils.generation.image import array_to_image +from test.utils.generation.pdf import add_image +from test.utils.comparison import transform_equal @pytest.mark.parametrize("coordinate_system", ["fpdf"]) diff --git a/test/unit_tests/image_extractor_test.py b/test/unit_tests/image_extractor_test.py index 5bc2b42..9c3d03d 100644 --- a/test/unit_tests/image_extractor_test.py +++ b/test/unit_tests/image_extractor_test.py @@ -11,7 +11,8 @@ from image_prediction.extraction import extract_images_from_pdf from image_prediction.image_extractor.extractor import ImageMetadataPair from image_prediction.image_extractor.extractors.parsable import extract_pages, get_image_infos, has_alpha_channel from image_prediction.info import Info -from test.conftest import add_image, pdf_stream, images_equal, metadata_equal +from test.utils.generation.pdf import add_image, pdf_stream +from test.utils.comparison import images_equal, metadata_equal, image_sets_equal @pytest.mark.parametrize("extractor_type", ["mock"]) @@ -27,7 +28,7 @@ def test_image_extractor_mock(image_extractor, images): def test_parsable_pdf_image_extractor(image_extractor, pdf, images, metadata, input_size, alpha): images_extracted, metadata_extracted = map(list, extract_images_from_pdf(pdf, image_extractor)) if not alpha: - all(any(images_equal(imex, im) for im in images) for imex in images_extracted) + assert image_sets_equal(images_extracted, images) assert metadata_equal(metadata_extracted, metadata) diff --git a/test/unit_tests/image_stitching_test.py b/test/unit_tests/image_stitching_test.py index 9af487e..677b4ee 100644 --- a/test/unit_tests/image_stitching_test.py +++ b/test/unit_tests/image_stitching_test.py @@ -30,12 +30,9 @@ from image_prediction.stitching.utils import ( make_coord_getter, make_length_getter, ) -from test.conftest import ( - add_image, - random_single_color_image_from_metadata, - gray_image_from_metadata, - images_equal, -) +from test.utils.generation.pdf import add_image +from test.utils.generation.image import random_single_color_image_from_metadata, gray_image_from_metadata +from test.utils.comparison import images_equal from test.utils.stitching import BoxSplitter x1_getter, y1_getter, x2_getter, y2_getter = map(make_coord_getter, ("x1", "y1", "x2", "y2")) diff --git a/test/utils/comparison.py b/test/utils/comparison.py new file mode 100644 index 0000000..daefd1b --- /dev/null +++ b/test/utils/comparison.py @@ -0,0 +1,23 @@ +from typing import Iterable + +import numpy as np +from PIL import Image +from frozendict import frozendict + +from image_prediction.estimator.preprocessor.utils import image_to_normalized_tensor + + +def transform_equal(a, b): + return (list(a) if isinstance(a, map) else a) == b + + +def images_equal(im1: Image, im2: Image, **kwargs): + return np.allclose(image_to_normalized_tensor(im1), image_to_normalized_tensor(im2), **kwargs) + + +def metadata_equal(mdat1: Iterable, mdat2: Iterable): + return set(map(frozendict, mdat1)) == set(map(frozendict, mdat2)) + + +def image_sets_equal(ims1, ims2): + return all(any(images_equal(im1, im2) for im2 in ims2) for im1 in ims1) diff --git a/test/utils/generation/__init__.py b/test/utils/generation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/utils/generation/image.py b/test/utils/generation/image.py new file mode 100644 index 0000000..c588bf7 --- /dev/null +++ b/test/utils/generation/image.py @@ -0,0 +1,31 @@ +import numpy as np +from PIL import Image + +from image_prediction.info import Info + + +def random_single_color_image_from_metadata(metadata): + image = Image.new( + "RGB", (metadata[Info.WIDTH], metadata[Info.HEIGHT]), color=tuple(map(int, np.random.uniform(size=3) * 255)) + ) + return image + + +def gray_image_from_metadata(metadata): + image = Image.new("RGB", (metadata[Info.WIDTH], metadata[Info.HEIGHT]), color=(100, 100, 100)) + return image + + +def array_to_image(array): + assert np.all(array <= 1) + assert np.all(array >= 0) + + if array.shape[-1] == 3: + mode = "RGB" + elif array.shape[-1] == 4: + mode = "RGBA" + else: + raise ValueError(f"Unexpected number of channels {array.shape[-1]}. Expected 3 or 4.") + + # noinspection PyTypeChecker + return Image.fromarray(np.uint8(array * 255), mode=mode) diff --git a/test/utils/generation/pdf.py b/test/utils/generation/pdf.py new file mode 100644 index 0000000..852647e --- /dev/null +++ b/test/utils/generation/pdf.py @@ -0,0 +1,30 @@ +import tempfile +from operator import itemgetter + +import fpdf + +from image_prediction.info import Info + + +def add_image(pdf, image_metadata_pair, suffix="png"): + while fewer_pages_then_required(image_metadata_pair.metadata[Info.PAGE_IDX], pdf): + pdf.add_page() + + add_image_to_last_page(pdf, image_metadata_pair, suffix=suffix) + + +def fewer_pages_then_required(page_idx, pdf): + return page_idx > pdf.page - 1 + + +def pdf_stream(pdf: fpdf.fpdf.FPDF): + return pdf.output(dest="S").encode("latin1") + + +def add_image_to_last_page(pdf: fpdf.fpdf.FPDF, image_metadata_pair, suffix): + image, metadata = image_metadata_pair + x, y, w, h = itemgetter(Info.X1, Info.Y1, Info.WIDTH, Info.HEIGHT)(metadata) + + with tempfile.NamedTemporaryFile(suffix=f".{suffix}") as temp_image: + image.save(temp_image.name) + pdf.image(temp_image.name, x=x, y=y, w=w, h=h, type=suffix) diff --git a/test/utils/model.py b/test/utils/model.py new file mode 100644 index 0000000..dbeee5b --- /dev/null +++ b/test/utils/model.py @@ -0,0 +1,127 @@ +import pytest + +from image_prediction.classifier.classifier import Classifier +from image_prediction.classifier.image_classifier import ImageClassifier +from image_prediction.estimator.adapter.adapter import EstimatorAdapter +from image_prediction.estimator.preprocessor.preprocessors.basic import BasicPreprocessor +from image_prediction.exceptions import UnknownImageExtractor, UnknownEstimatorAdapter +from image_prediction.image_extractor.extractors.mock import ImageExtractorMock +from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor +from image_prediction.redai_adapter.model import PredictionModelHandle + + +@pytest.fixture +def estimator_mock(): + class EstimatorMock: + @staticmethod + def predict(batch): + return [None for _ in batch] + + @staticmethod + def predict_proba(batch): + return [None for _ in batch] + + def __call__(self, batch): + return self.predict(batch) + + return EstimatorMock() + + +@pytest.fixture +def image_extractor(extractor_type): + if extractor_type == "mock": + return ImageExtractorMock() + elif extractor_type == "parsable_pdf": + return ParsablePDFImageExtractor() + elif extractor_type == "default": + return None + else: + raise UnknownImageExtractor(f"No image extractor for type {extractor_type} was specified.") + + +@pytest.fixture +def image_classifier(classifier, monkeypatch, batch_of_expected_string_labels): + return ImageClassifier(classifier, preprocessor=BasicPreprocessor()) + + +@pytest.fixture +def classifier(estimator_adapter, label_mapper): + classifier = Classifier(estimator_adapter, label_mapper) + return classifier + + +@pytest.fixture +def estimator_adapter( + estimator_type, estimator_mock, keras_model, model_handle_mock, output_batch_generator, monkeypatch +): + if estimator_type == "mock": + estimator_adapter = EstimatorAdapter(estimator_mock) + elif estimator_type == "keras": + estimator_adapter = EstimatorAdapter(keras_model) + elif estimator_type == "redai": + estimator_adapter = EstimatorAdapter(PredictionModelHandle(model_handle_mock)) + else: + raise UnknownEstimatorAdapter(f"No adapter for estimator type {estimator_type} was specified.") + + def mock_predict(batch): + # Run real predict function to test for mechanical issues, but return externally defined + # predictions to test the callers of the estimator adapter against the expected predictions + return [next(output_batch_generator) for _ in _predict(batch)] + + _predict = estimator_adapter.predict + monkeypatch.setattr(estimator_adapter, "predict", mock_predict) + + return estimator_adapter + + +@pytest.fixture +def keras_model(input_size): + import os + + os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + + import tensorflow as tf + + tf.keras.backend.set_image_data_format("channels_last") + + inputs = tf.keras.Input(shape=input_size) + conv = tf.keras.layers.Conv2D(3, 3) + dense = tf.keras.layers.Dense(10) + + outputs = tf.keras.layers.Dense(10)(dense(conv(inputs))) + model = tf.keras.Model(inputs=inputs, outputs=outputs) + model.compile() + + return model + + +@pytest.fixture +def model(): + class Model: + @staticmethod + def predict(*args): + return True + + @staticmethod + def predict_proba(*args): + return True + + return Model() + + +@pytest.fixture +def model_handle_mock(estimator_mock): + class ModelHandleMock: + def __init__(self): + self.model = estimator_mock + + def prep_images(self, batch): + return [None for _ in batch] + + def predict(self, batch): + return [None for _ in batch] + + def predict_proba(self, batch): + return [None for _ in batch] + + return ModelHandleMock() \ No newline at end of file diff --git a/test/utils/model_store.py b/test/utils/model_store.py new file mode 100644 index 0000000..e69de29