From 41f0cc8a41b3ff8e4f9cedc284c12851e23d6b44 Mon Sep 17 00:00:00 2001
From: Matthias Bisping <matthias.bisping@iqser.com>
Date: Fri, 25 Mar 2022 11:42:31 +0100
Subject: [PATCH] estimator + model label mapping

---
 config.yaml                                   |  4 +-
 image_prediction/flask.py                     |  2 +-
 image_prediction/model/mock.py                |  6 ---
 image_prediction/model/model.py               | 13 ------
 image_prediction/predictor.py                 |  6 +--
 .../{model => service_estimator}/__init__.py  |  0
 image_prediction/service_estimator/mock.py    |  6 +++
 .../service_estimator/service_estimator.py    | 14 +++++++
 scripts/keras_MnWE.py                         |  4 +-
 src/serve.py                                  |  2 +-
 test/unit_tests/model_test.py                 | 36 ----------------
 test/unit_tests/service_estimator_test.py     | 41 +++++++++++++++++++
 12 files changed, 70 insertions(+), 64 deletions(-)
 delete mode 100644 image_prediction/model/mock.py
 delete mode 100644 image_prediction/model/model.py
 rename image_prediction/{model => service_estimator}/__init__.py (100%)
 create mode 100644 image_prediction/service_estimator/mock.py
 create mode 100644 image_prediction/service_estimator/service_estimator.py
 delete mode 100644 test/unit_tests/model_test.py
 create mode 100644 test/unit_tests/service_estimator_test.py

diff --git a/config.yaml b/config.yaml
index dbc5b87..c3c8319 100644
--- a/config.yaml
+++ b/config.yaml
@@ -8,10 +8,10 @@ service:
   progressbar: True  # Whether a progress bar over the pages of a document is displayed while processing
   batch_size: $BATCH_SIZE|32  # Number of images in memory simultaneously
   verbose: $VERBOSE|True  # Service prints document processing progress to stdout
-  run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7  # The ID of the mlflow run to load the model from
+  run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7  # The ID of the mlflow run to load the service_estimator from
 
 
-# These variables control filters that are applied to either images, image metadata or model predictions. The filter
+# These variables control filters that are applied to either images, image metadata or service_estimator predictions. The filter
 # result values are reported in the service responses. For convenience the response to a request contains a
 # "filters.allPassed" field, which is set to false if any of the filters returned values did not meet its specified
 # required value.
diff --git a/image_prediction/flask.py b/image_prediction/flask.py
index 5cf40c2..33c3cf3 100644
--- a/image_prediction/flask.py
+++ b/image_prediction/flask.py
@@ -30,7 +30,7 @@ def make_prediction_server(predict_fn: Callable):
             return_dict["result"] = predict_fn(pdf)
 
         def process():
-            # Tensorflow does not free RAM. Workaround is running model in process.
+            # Tensorflow does not free RAM. Workaround is running service_estimator in process.
             # https://stackoverflow.com/questions/39758094/clearing-tensorflow-gpu-memory-after-model-execution
             pdf = request.data
             manager = multiprocessing.Manager()
diff --git a/image_prediction/model/mock.py b/image_prediction/model/mock.py
deleted file mode 100644
index f52722c..0000000
--- a/image_prediction/model/mock.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from image_prediction.model.model import Model
-
-
-class ModelMock(Model):
-    def __init__(self, estimator):
-        super().__init__(estimator=estimator)
diff --git a/image_prediction/model/model.py b/image_prediction/model/model.py
deleted file mode 100644
index 72119f4..0000000
--- a/image_prediction/model/model.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import abc
-
-
-class Model(abc.ABC):
-    def __init__(self, estimator):
-        self.__estimator = estimator
-
-    @property
-    def estimator(self):
-        return self.__estimator
-
-    def predict(self, batch):
-        return self.estimator.predict(batch)
diff --git a/image_prediction/predictor.py b/image_prediction/predictor.py
index 8e83f2c..2dad683 100644
--- a/image_prediction/predictor.py
+++ b/image_prediction/predictor.py
@@ -16,15 +16,15 @@ logger = get_logger()
 
 
 class Predictor:
-    """`ModelHandle` wrapper. Forwards to wrapped model handle for prediction and produces structured output that is
-    interpretable independently of the wrapped model (e.g. with regard to a .classes_ attribute).
+    """`ModelHandle` wrapper. Forwards to wrapped service_estimator handle for prediction and produces structured output that is
+    interpretable independently of the wrapped service_estimator (e.g. with regard to a .classes_ attribute).
     """
 
     def __init__(self, model_handle: ModelHandle = None):
         """Initializes a ServiceEstimator.
 
         Args:
-            model_handle: ModelHandle object to forward to for prediction. By default, a model handle is loaded from the
+            model_handle: ModelHandle object to forward to for prediction. By default, a service_estimator handle is loaded from the
                 mlflow database via CONFIG.service.run_id.
         """
         try:
diff --git a/image_prediction/model/__init__.py b/image_prediction/service_estimator/__init__.py
similarity index 100%
rename from image_prediction/model/__init__.py
rename to image_prediction/service_estimator/__init__.py
diff --git a/image_prediction/service_estimator/mock.py b/image_prediction/service_estimator/mock.py
new file mode 100644
index 0000000..815a2f1
--- /dev/null
+++ b/image_prediction/service_estimator/mock.py
@@ -0,0 +1,6 @@
+from image_prediction.service_estimator.service_estimator import ServiceEstimator
+
+
+class ServiceEstimatorMock(ServiceEstimator):
+    def __init__(self, estimator, classes):
+        super().__init__(estimator=estimator, classes=classes)
diff --git a/image_prediction/service_estimator/service_estimator.py b/image_prediction/service_estimator/service_estimator.py
new file mode 100644
index 0000000..1c2b2fe
--- /dev/null
+++ b/image_prediction/service_estimator/service_estimator.py
@@ -0,0 +1,14 @@
+import abc
+
+
+class ServiceEstimator(abc.ABC):
+    def __init__(self, estimator, classes):
+        self.__estimator = estimator
+        self.__classes = classes
+
+    @property
+    def estimator(self):
+        return self.__estimator
+
+    def predict(self, batch):
+        return [self.__classes[numeric_label] for numeric_label in self.estimator.predict(batch)]
diff --git a/scripts/keras_MnWE.py b/scripts/keras_MnWE.py
index 05a45dd..0788e43 100644
--- a/scripts/keras_MnWE.py
+++ b/scripts/keras_MnWE.py
@@ -26,7 +26,7 @@ def make_model():
     dense = layers.Dense(64, activation="relu")
     x = dense(inputs)
     outputs = layers.Dense(10)(x)
-    model = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")
+    model = keras.ServiceEstimator(inputs=inputs, outputs=outputs, name="mnist_model")
     model.compile(
         loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
         optimizer=keras.optimizers.RMSprop(),
@@ -40,7 +40,7 @@ def make_predict_fn():
     model = make_model()
 
     def predict(*args):
-        # model = make_model()
+        # service_estimator = make_model()
         return model.predict(np.random.random(size=(1, 784)))
 
     return predict
diff --git a/src/serve.py b/src/serve.py
index 666ca80..989a0da 100644
--- a/src/serve.py
+++ b/src/serve.py
@@ -13,7 +13,7 @@ logger = get_logger()
 
 def main():
     def predict(pdf):
-        # Keras model.predict stalls when model was loaded in different process
+        # Keras service_estimator.predict stalls when service_estimator was loaded in different process
         # https://stackoverflow.com/questions/42504669/keras-tensorflow-and-multiprocessing-in-python
         predictor = Predictor()
         predictions, metadata = predictor.predict_pdf(pdf, verbose=CONFIG.service.progressbar)
diff --git a/test/unit_tests/model_test.py b/test/unit_tests/model_test.py
deleted file mode 100644
index 2104a97..0000000
--- a/test/unit_tests/model_test.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import numpy as np
-import pytest
-
-from image_prediction.estimator.mock import EstimatorMock
-from image_prediction.model.mock import ModelMock
-
-
-@pytest.fixture(scope="session")
-def estimator():
-    return EstimatorMock()
-
-
-@pytest.fixture(scope="session")
-def batches(batch_size):
-    input_batch = np.random.normal(size=(batch_size, 10, 15))
-    output_batch = np.random.randint(low=42, high=43, size=(batch_size, 10, 15))
-    return input_batch, output_batch
-
-
-@pytest.fixture(scope="session")
-def classes():
-    return ["A", "B", "C"]
-
-
-@pytest.fixture(scope="session")
-def model(model_type, estimator):
-    if model_type == "mock":
-        return ModelMock(estimator)
-
-
-@pytest.mark.parametrize("model_type", ["mock"], scope="session")
-@pytest.mark.parametrize("batch_size", [0, 1, 2, 16, 32, 64], scope="session")
-def test_predict(model, batches):
-    input_batch, output_batch = batches
-    model.estimator.output_batch = output_batch
-    assert np.all(np.equal(model.predict(input_batch), output_batch))
diff --git a/test/unit_tests/service_estimator_test.py b/test/unit_tests/service_estimator_test.py
new file mode 100644
index 0000000..b0ffc1c
--- /dev/null
+++ b/test/unit_tests/service_estimator_test.py
@@ -0,0 +1,41 @@
+import numpy as np
+import pytest
+
+from image_prediction.estimator.mock import EstimatorMock
+from image_prediction.service_estimator.mock import ServiceEstimatorMock
+
+
+@pytest.fixture(scope="session")
+def estimator():
+    return EstimatorMock()
+
+
+@pytest.fixture(scope="session")
+def batches(batch_size, classes):
+    input_batch = np.random.normal(size=(batch_size, 10, 15))
+    output_batch = np.random.randint(low=0, high=len(classes), size=batch_size)
+    return input_batch, output_batch
+
+
+@pytest.fixture(scope="session")
+def classes():
+    return ["A", "B", "C"]
+
+
+def map_labels(numeric_labels, classes):
+    return [classes[nl] for nl in numeric_labels]
+
+
+@pytest.fixture(scope="session")
+def service_estimator(model_type, estimator, classes):
+    if model_type == "mock":
+        return ServiceEstimatorMock(estimator, classes)
+
+
+@pytest.mark.parametrize("model_type", ["mock"], scope="session")
+@pytest.mark.parametrize("batch_size", [0, 1, 2, 16, 32, 64], scope="session")
+def test_predict(service_estimator, batches, classes):
+    input_batch, output_batch = batches
+    service_estimator.estimator.output_batch = output_batch
+    expected_predictions = map_labels(output_batch, classes)
+    assert service_estimator.predict(input_batch) == expected_predictions