estimator + model label mapping

2022-03-25 11:42:31 +01:00 · 2022-03-25 11:42:31 +01:00 · 41f0cc8a41
commit 41f0cc8a41
parent ee959346b7
12 changed files with 70 additions and 64 deletions
--- a/config.yaml
+++ b/config.yaml
@ -8,10 +8,10 @@ service:
  progressbar: True  # Whether a progress bar over the pages of a document is displayed while processing
  batch_size: $BATCH_SIZE|32  # Number of images in memory simultaneously
  verbose: $VERBOSE|True  # Service prints document processing progress to stdout
-  run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7  # The ID of the mlflow run to load the model from
+  run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7  # The ID of the mlflow run to load the service_estimator from


-# These variables control filters that are applied to either images, image metadata or model predictions. The filter
+# These variables control filters that are applied to either images, image metadata or service_estimator predictions. The filter
 # result values are reported in the service responses. For convenience the response to a request contains a
 # "filters.allPassed" field, which is set to false if any of the filters returned values did not meet its specified
 # required value.
--- a/image_prediction/flask.py
+++ b/image_prediction/flask.py
@ -30,7 +30,7 @@ def make_prediction_server(predict_fn: Callable):
            return_dict["result"] = predict_fn(pdf)

        def process():
-            # Tensorflow does not free RAM. Workaround is running model in process.
+            # Tensorflow does not free RAM. Workaround is running service_estimator in process.
            # https://stackoverflow.com/questions/39758094/clearing-tensorflow-gpu-memory-after-model-execution
            pdf = request.data
            manager = multiprocessing.Manager()
--- a/image_prediction/model/mock.py
+++ b/image_prediction/model/mock.py
@ -1,6 +0,0 @@
-from image_prediction.model.model import Model
-
-
-class ModelMock(Model):
-    def __init__(self, estimator):
-        super().__init__(estimator=estimator)
--- a/image_prediction/model/model.py
+++ b/image_prediction/model/model.py
@ -1,13 +0,0 @@
-import abc
-
-
-class Model(abc.ABC):
-    def __init__(self, estimator):
-        self.__estimator = estimator
-
-    @property
-    def estimator(self):
-        return self.__estimator
-
-    def predict(self, batch):
-        return self.estimator.predict(batch)
--- a/image_prediction/predictor.py
+++ b/image_prediction/predictor.py
@ -16,15 +16,15 @@ logger = get_logger()


 class Predictor:
-    """`ModelHandle` wrapper. Forwards to wrapped model handle for prediction and produces structured output that is
-    interpretable independently of the wrapped model (e.g. with regard to a .classes_ attribute).
+    """`ModelHandle` wrapper. Forwards to wrapped service_estimator handle for prediction and produces structured output that is
+    interpretable independently of the wrapped service_estimator (e.g. with regard to a .classes_ attribute).
    """

    def __init__(self, model_handle: ModelHandle = None):
        """Initializes a ServiceEstimator.

        Args:
-            model_handle: ModelHandle object to forward to for prediction. By default, a model handle is loaded from the
+            model_handle: ModelHandle object to forward to for prediction. By default, a service_estimator handle is loaded from the
                mlflow database via CONFIG.service.run_id.
        """
        try:
--- a/image_prediction/service_estimator/init.py
+++ b/image_prediction/service_estimator/init.py
--- a/image_prediction/service_estimator/mock.py
+++ b/image_prediction/service_estimator/mock.py
@ -0,0 +1,6 @@
+from image_prediction.service_estimator.service_estimator import ServiceEstimator
+
+
+class ServiceEstimatorMock(ServiceEstimator):
+    def __init__(self, estimator, classes):
+        super().__init__(estimator=estimator, classes=classes)
--- a/image_prediction/service_estimator/service_estimator.py
+++ b/image_prediction/service_estimator/service_estimator.py
@ -0,0 +1,14 @@
+import abc
+
+
+class ServiceEstimator(abc.ABC):
+    def __init__(self, estimator, classes):
+        self.__estimator = estimator
+        self.__classes = classes
+
+    @property
+    def estimator(self):
+        return self.__estimator
+
+    def predict(self, batch):
+        return [self.__classes[numeric_label] for numeric_label in self.estimator.predict(batch)]
--- a/scripts/keras_MnWE.py
+++ b/scripts/keras_MnWE.py
@ -26,7 +26,7 @@ def make_model():
    dense = layers.Dense(64, activation="relu")
    x = dense(inputs)
    outputs = layers.Dense(10)(x)
-    model = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")
+    model = keras.ServiceEstimator(inputs=inputs, outputs=outputs, name="mnist_model")
    model.compile(
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        optimizer=keras.optimizers.RMSprop(),
@ -40,7 +40,7 @@ def make_predict_fn():
    model = make_model()

    def predict(*args):
-        # model = make_model()
+        # service_estimator = make_model()
        return model.predict(np.random.random(size=(1, 784)))

    return predict
--- a/src/serve.py
+++ b/src/serve.py
@ -13,7 +13,7 @@ logger = get_logger()

 def main():
    def predict(pdf):
-        # Keras model.predict stalls when model was loaded in different process
+        # Keras service_estimator.predict stalls when service_estimator was loaded in different process
        # https://stackoverflow.com/questions/42504669/keras-tensorflow-and-multiprocessing-in-python
        predictor = Predictor()
        predictions, metadata = predictor.predict_pdf(pdf, verbose=CONFIG.service.progressbar)
--- a/test/unit_tests/model_test.py
+++ b/test/unit_tests/model_test.py
@ -1,36 +0,0 @@
-import numpy as np
-import pytest
-
-from image_prediction.estimator.mock import EstimatorMock
-from image_prediction.model.mock import ModelMock
-
-
-@pytest.fixture(scope="session")
-def estimator():
-    return EstimatorMock()
-
-
-@pytest.fixture(scope="session")
-def batches(batch_size):
-    input_batch = np.random.normal(size=(batch_size, 10, 15))
-    output_batch = np.random.randint(low=42, high=43, size=(batch_size, 10, 15))
-    return input_batch, output_batch
-
-
-@pytest.fixture(scope="session")
-def classes():
-    return ["A", "B", "C"]
-
-
-@pytest.fixture(scope="session")
-def model(model_type, estimator):
-    if model_type == "mock":
-        return ModelMock(estimator)
-
-
-@pytest.mark.parametrize("model_type", ["mock"], scope="session")
-@pytest.mark.parametrize("batch_size", [0, 1, 2, 16, 32, 64], scope="session")
-def test_predict(model, batches):
-    input_batch, output_batch = batches
-    model.estimator.output_batch = output_batch
-    assert np.all(np.equal(model.predict(input_batch), output_batch))
--- a/test/unit_tests/service_estimator_test.py
+++ b/test/unit_tests/service_estimator_test.py
@ -0,0 +1,41 @@
+import numpy as np
+import pytest
+
+from image_prediction.estimator.mock import EstimatorMock
+from image_prediction.service_estimator.mock import ServiceEstimatorMock
+
+
+@pytest.fixture(scope="session")
+def estimator():
+    return EstimatorMock()
+
+
+@pytest.fixture(scope="session")
+def batches(batch_size, classes):
+    input_batch = np.random.normal(size=(batch_size, 10, 15))
+    output_batch = np.random.randint(low=0, high=len(classes), size=batch_size)
+    return input_batch, output_batch
+
+
+@pytest.fixture(scope="session")
+def classes():
+    return ["A", "B", "C"]
+
+
+def map_labels(numeric_labels, classes):
+    return [classes[nl] for nl in numeric_labels]
+
+
+@pytest.fixture(scope="session")
+def service_estimator(model_type, estimator, classes):
+    if model_type == "mock":
+        return ServiceEstimatorMock(estimator, classes)
+
+
+@pytest.mark.parametrize("model_type", ["mock"], scope="session")
+@pytest.mark.parametrize("batch_size", [0, 1, 2, 16, 32, 64], scope="session")
+def test_predict(service_estimator, batches, classes):
+    input_batch, output_batch = batches
+    service_estimator.estimator.output_batch = output_batch
+    expected_predictions = map_labels(output_batch, classes)
+    assert service_estimator.predict(input_batch) == expected_predictions