From 41f0cc8a41b3ff8e4f9cedc284c12851e23d6b44 Mon Sep 17 00:00:00 2001 From: Matthias Bisping Date: Fri, 25 Mar 2022 11:42:31 +0100 Subject: [PATCH] estimator + model label mapping --- config.yaml | 4 +- image_prediction/flask.py | 2 +- image_prediction/model/mock.py | 6 --- image_prediction/model/model.py | 13 ------ image_prediction/predictor.py | 6 +-- .../{model => service_estimator}/__init__.py | 0 image_prediction/service_estimator/mock.py | 6 +++ .../service_estimator/service_estimator.py | 14 +++++++ scripts/keras_MnWE.py | 4 +- src/serve.py | 2 +- test/unit_tests/model_test.py | 36 ---------------- test/unit_tests/service_estimator_test.py | 41 +++++++++++++++++++ 12 files changed, 70 insertions(+), 64 deletions(-) delete mode 100644 image_prediction/model/mock.py delete mode 100644 image_prediction/model/model.py rename image_prediction/{model => service_estimator}/__init__.py (100%) create mode 100644 image_prediction/service_estimator/mock.py create mode 100644 image_prediction/service_estimator/service_estimator.py delete mode 100644 test/unit_tests/model_test.py create mode 100644 test/unit_tests/service_estimator_test.py diff --git a/config.yaml b/config.yaml index dbc5b87..c3c8319 100644 --- a/config.yaml +++ b/config.yaml @@ -8,10 +8,10 @@ service: progressbar: True # Whether a progress bar over the pages of a document is displayed while processing batch_size: $BATCH_SIZE|32 # Number of images in memory simultaneously verbose: $VERBOSE|True # Service prints document processing progress to stdout - run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the model from + run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the service_estimator from -# These variables control filters that are applied to either images, image metadata or model predictions. The filter +# These variables control filters that are applied to either images, image metadata or service_estimator predictions. The filter # result values are reported in the service responses. For convenience the response to a request contains a # "filters.allPassed" field, which is set to false if any of the filters returned values did not meet its specified # required value. diff --git a/image_prediction/flask.py b/image_prediction/flask.py index 5cf40c2..33c3cf3 100644 --- a/image_prediction/flask.py +++ b/image_prediction/flask.py @@ -30,7 +30,7 @@ def make_prediction_server(predict_fn: Callable): return_dict["result"] = predict_fn(pdf) def process(): - # Tensorflow does not free RAM. Workaround is running model in process. + # Tensorflow does not free RAM. Workaround is running service_estimator in process. # https://stackoverflow.com/questions/39758094/clearing-tensorflow-gpu-memory-after-model-execution pdf = request.data manager = multiprocessing.Manager() diff --git a/image_prediction/model/mock.py b/image_prediction/model/mock.py deleted file mode 100644 index f52722c..0000000 --- a/image_prediction/model/mock.py +++ /dev/null @@ -1,6 +0,0 @@ -from image_prediction.model.model import Model - - -class ModelMock(Model): - def __init__(self, estimator): - super().__init__(estimator=estimator) diff --git a/image_prediction/model/model.py b/image_prediction/model/model.py deleted file mode 100644 index 72119f4..0000000 --- a/image_prediction/model/model.py +++ /dev/null @@ -1,13 +0,0 @@ -import abc - - -class Model(abc.ABC): - def __init__(self, estimator): - self.__estimator = estimator - - @property - def estimator(self): - return self.__estimator - - def predict(self, batch): - return self.estimator.predict(batch) diff --git a/image_prediction/predictor.py b/image_prediction/predictor.py index 8e83f2c..2dad683 100644 --- a/image_prediction/predictor.py +++ b/image_prediction/predictor.py @@ -16,15 +16,15 @@ logger = get_logger() class Predictor: - """`ModelHandle` wrapper. Forwards to wrapped model handle for prediction and produces structured output that is - interpretable independently of the wrapped model (e.g. with regard to a .classes_ attribute). + """`ModelHandle` wrapper. Forwards to wrapped service_estimator handle for prediction and produces structured output that is + interpretable independently of the wrapped service_estimator (e.g. with regard to a .classes_ attribute). """ def __init__(self, model_handle: ModelHandle = None): """Initializes a ServiceEstimator. Args: - model_handle: ModelHandle object to forward to for prediction. By default, a model handle is loaded from the + model_handle: ModelHandle object to forward to for prediction. By default, a service_estimator handle is loaded from the mlflow database via CONFIG.service.run_id. """ try: diff --git a/image_prediction/model/__init__.py b/image_prediction/service_estimator/__init__.py similarity index 100% rename from image_prediction/model/__init__.py rename to image_prediction/service_estimator/__init__.py diff --git a/image_prediction/service_estimator/mock.py b/image_prediction/service_estimator/mock.py new file mode 100644 index 0000000..815a2f1 --- /dev/null +++ b/image_prediction/service_estimator/mock.py @@ -0,0 +1,6 @@ +from image_prediction.service_estimator.service_estimator import ServiceEstimator + + +class ServiceEstimatorMock(ServiceEstimator): + def __init__(self, estimator, classes): + super().__init__(estimator=estimator, classes=classes) diff --git a/image_prediction/service_estimator/service_estimator.py b/image_prediction/service_estimator/service_estimator.py new file mode 100644 index 0000000..1c2b2fe --- /dev/null +++ b/image_prediction/service_estimator/service_estimator.py @@ -0,0 +1,14 @@ +import abc + + +class ServiceEstimator(abc.ABC): + def __init__(self, estimator, classes): + self.__estimator = estimator + self.__classes = classes + + @property + def estimator(self): + return self.__estimator + + def predict(self, batch): + return [self.__classes[numeric_label] for numeric_label in self.estimator.predict(batch)] diff --git a/scripts/keras_MnWE.py b/scripts/keras_MnWE.py index 05a45dd..0788e43 100644 --- a/scripts/keras_MnWE.py +++ b/scripts/keras_MnWE.py @@ -26,7 +26,7 @@ def make_model(): dense = layers.Dense(64, activation="relu") x = dense(inputs) outputs = layers.Dense(10)(x) - model = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model") + model = keras.ServiceEstimator(inputs=inputs, outputs=outputs, name="mnist_model") model.compile( loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=keras.optimizers.RMSprop(), @@ -40,7 +40,7 @@ def make_predict_fn(): model = make_model() def predict(*args): - # model = make_model() + # service_estimator = make_model() return model.predict(np.random.random(size=(1, 784))) return predict diff --git a/src/serve.py b/src/serve.py index 666ca80..989a0da 100644 --- a/src/serve.py +++ b/src/serve.py @@ -13,7 +13,7 @@ logger = get_logger() def main(): def predict(pdf): - # Keras model.predict stalls when model was loaded in different process + # Keras service_estimator.predict stalls when service_estimator was loaded in different process # https://stackoverflow.com/questions/42504669/keras-tensorflow-and-multiprocessing-in-python predictor = Predictor() predictions, metadata = predictor.predict_pdf(pdf, verbose=CONFIG.service.progressbar) diff --git a/test/unit_tests/model_test.py b/test/unit_tests/model_test.py deleted file mode 100644 index 2104a97..0000000 --- a/test/unit_tests/model_test.py +++ /dev/null @@ -1,36 +0,0 @@ -import numpy as np -import pytest - -from image_prediction.estimator.mock import EstimatorMock -from image_prediction.model.mock import ModelMock - - -@pytest.fixture(scope="session") -def estimator(): - return EstimatorMock() - - -@pytest.fixture(scope="session") -def batches(batch_size): - input_batch = np.random.normal(size=(batch_size, 10, 15)) - output_batch = np.random.randint(low=42, high=43, size=(batch_size, 10, 15)) - return input_batch, output_batch - - -@pytest.fixture(scope="session") -def classes(): - return ["A", "B", "C"] - - -@pytest.fixture(scope="session") -def model(model_type, estimator): - if model_type == "mock": - return ModelMock(estimator) - - -@pytest.mark.parametrize("model_type", ["mock"], scope="session") -@pytest.mark.parametrize("batch_size", [0, 1, 2, 16, 32, 64], scope="session") -def test_predict(model, batches): - input_batch, output_batch = batches - model.estimator.output_batch = output_batch - assert np.all(np.equal(model.predict(input_batch), output_batch)) diff --git a/test/unit_tests/service_estimator_test.py b/test/unit_tests/service_estimator_test.py new file mode 100644 index 0000000..b0ffc1c --- /dev/null +++ b/test/unit_tests/service_estimator_test.py @@ -0,0 +1,41 @@ +import numpy as np +import pytest + +from image_prediction.estimator.mock import EstimatorMock +from image_prediction.service_estimator.mock import ServiceEstimatorMock + + +@pytest.fixture(scope="session") +def estimator(): + return EstimatorMock() + + +@pytest.fixture(scope="session") +def batches(batch_size, classes): + input_batch = np.random.normal(size=(batch_size, 10, 15)) + output_batch = np.random.randint(low=0, high=len(classes), size=batch_size) + return input_batch, output_batch + + +@pytest.fixture(scope="session") +def classes(): + return ["A", "B", "C"] + + +def map_labels(numeric_labels, classes): + return [classes[nl] for nl in numeric_labels] + + +@pytest.fixture(scope="session") +def service_estimator(model_type, estimator, classes): + if model_type == "mock": + return ServiceEstimatorMock(estimator, classes) + + +@pytest.mark.parametrize("model_type", ["mock"], scope="session") +@pytest.mark.parametrize("batch_size", [0, 1, 2, 16, 32, 64], scope="session") +def test_predict(service_estimator, batches, classes): + input_batch, output_batch = batches + service_estimator.estimator.output_batch = output_batch + expected_predictions = map_labels(output_batch, classes) + assert service_estimator.predict(input_batch) == expected_predictions