estimator + model label mapping

2022-03-25 11:42:31 +01:00 · 2022-03-25 11:42:31 +01:00 · 41f0cc8a41
commit 41f0cc8a41
parent ee959346b7
12 changed files with 70 additions and 64 deletions
--- a/config.yaml
+++ b/config.yaml
@ -8,10 +8,10 @@ service:
  progressbar: True  # Whether a progress bar over the pages of a document is displayed while processing
  batch_size: $BATCH_SIZE|32  # Number of images in memory simultaneously
  verbose: $VERBOSE|True  # Service prints document processing progress to stdout
-  run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7  # The ID of the mlflow run to load the model from
+  run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7  # The ID of the mlflow run to load the service_estimator from
-# These variables control filters that are applied to either images, image metadata or model predictions. The filter
+# These variables control filters that are applied to either images, image metadata or service_estimator predictions. The filter
 # result values are reported in the service responses. For convenience the response to a request contains a
 # "filters.allPassed" field, which is set to false if any of the filters returned values did not meet its specified
 # required value.
--- a/image_prediction/flask.py
+++ b/image_prediction/flask.py
@ -30,7 +30,7 @@ def make_prediction_server(predict_fn: Callable):
            return_dict["result"] = predict_fn(pdf)
        def process():
-            # Tensorflow does not free RAM. Workaround is running model in process.
+            # Tensorflow does not free RAM. Workaround is running service_estimator in process.
            # https://stackoverflow.com/questions/39758094/clearing-tensorflow-gpu-memory-after-model-execution
            pdf = request.data
            manager = multiprocessing.Manager()
--- a/image_prediction/model/mock.py
+++ b/image_prediction/model/mock.py
@ -1,6 +0,0 @@
 from image_prediction.model.model import Model
 class ModelMock(Model):
    def __init__(self, estimator):
        super().__init__(estimator=estimator)
--- a/image_prediction/model/model.py
+++ b/image_prediction/model/model.py
@ -1,13 +0,0 @@
 import abc
 class Model(abc.ABC):
    def __init__(self, estimator):
        self.__estimator = estimator
    @property
    def estimator(self):
        return self.__estimator
    def predict(self, batch):
        return self.estimator.predict(batch)
--- a/image_prediction/predictor.py
+++ b/image_prediction/predictor.py
@ -16,15 +16,15 @@ logger = get_logger()
 class Predictor:
-    """`ModelHandle` wrapper. Forwards to wrapped model handle for prediction and produces structured output that is
+    """`ModelHandle` wrapper. Forwards to wrapped service_estimator handle for prediction and produces structured output that is
-    interpretable independently of the wrapped model (e.g. with regard to a .classes_ attribute).
+    interpretable independently of the wrapped service_estimator (e.g. with regard to a .classes_ attribute).
    """
    def __init__(self, model_handle: ModelHandle = None):
        """Initializes a ServiceEstimator.
        Args:
-            model_handle: ModelHandle object to forward to for prediction. By default, a model handle is loaded from the
+            model_handle: ModelHandle object to forward to for prediction. By default, a service_estimator handle is loaded from the
                mlflow database via CONFIG.service.run_id.
        """
        try:
--- a/image_prediction/service_estimator/init.py
+++ b/image_prediction/service_estimator/init.py
--- a/image_prediction/service_estimator/mock.py
+++ b/image_prediction/service_estimator/mock.py
@ -0,0 +1,6 @@
 from image_prediction.service_estimator.service_estimator import ServiceEstimator
 class ServiceEstimatorMock(ServiceEstimator):
    def __init__(self, estimator, classes):
        super().__init__(estimator=estimator, classes=classes)
--- a/image_prediction/service_estimator/service_estimator.py
+++ b/image_prediction/service_estimator/service_estimator.py
@ -0,0 +1,14 @@
 import abc
 class ServiceEstimator(abc.ABC):
    def __init__(self, estimator, classes):
        self.__estimator = estimator
        self.__classes = classes
    @property
    def estimator(self):
        return self.__estimator
    def predict(self, batch):
        return [self.__classes[numeric_label] for numeric_label in self.estimator.predict(batch)]
--- a/scripts/keras_MnWE.py
+++ b/scripts/keras_MnWE.py
@ -26,7 +26,7 @@ def make_model():
    dense = layers.Dense(64, activation="relu")
    x = dense(inputs)
    outputs = layers.Dense(10)(x)
-    model = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")
+    model = keras.ServiceEstimator(inputs=inputs, outputs=outputs, name="mnist_model")
    model.compile(
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        optimizer=keras.optimizers.RMSprop(),
@ -40,7 +40,7 @@ def make_predict_fn():
    model = make_model()
    def predict(*args):
-        # model = make_model()
+        # service_estimator = make_model()
        return model.predict(np.random.random(size=(1, 784)))
    return predict
--- a/src/serve.py
+++ b/src/serve.py
@ -13,7 +13,7 @@ logger = get_logger()
 def main():
    def predict(pdf):
-        # Keras model.predict stalls when model was loaded in different process
+        # Keras service_estimator.predict stalls when service_estimator was loaded in different process
        # https://stackoverflow.com/questions/42504669/keras-tensorflow-and-multiprocessing-in-python
        predictor = Predictor()
        predictions, metadata = predictor.predict_pdf(pdf, verbose=CONFIG.service.progressbar)
--- a/test/unit_tests/model_test.py
+++ b/test/unit_tests/model_test.py
@ -1,36 +0,0 @@
 import numpy as np
 import pytest
 from image_prediction.estimator.mock import EstimatorMock
 from image_prediction.model.mock import ModelMock
@pytest.fixture(scope="session")
 def estimator():
    return EstimatorMock()
@pytest.fixture(scope="session")
 def batches(batch_size):
    input_batch = np.random.normal(size=(batch_size, 10, 15))
    output_batch = np.random.randint(low=42, high=43, size=(batch_size, 10, 15))
    return input_batch, output_batch
@pytest.fixture(scope="session")
 def classes():
    return ["A", "B", "C"]
@pytest.fixture(scope="session")
 def model(model_type, estimator):
    if model_type == "mock":
        return ModelMock(estimator)
@pytest.mark.parametrize("model_type", ["mock"], scope="session")
@pytest.mark.parametrize("batch_size", [0, 1, 2, 16, 32, 64], scope="session")
 def test_predict(model, batches):
    input_batch, output_batch = batches
    model.estimator.output_batch = output_batch
    assert np.all(np.equal(model.predict(input_batch), output_batch))
--- a/test/unit_tests/service_estimator_test.py
+++ b/test/unit_tests/service_estimator_test.py
@ -0,0 +1,41 @@
 import numpy as np
 import pytest
 from image_prediction.estimator.mock import EstimatorMock
 from image_prediction.service_estimator.mock import ServiceEstimatorMock
@pytest.fixture(scope="session")
 def estimator():
    return EstimatorMock()
@pytest.fixture(scope="session")
 def batches(batch_size, classes):
    input_batch = np.random.normal(size=(batch_size, 10, 15))
    output_batch = np.random.randint(low=0, high=len(classes), size=batch_size)
    return input_batch, output_batch
@pytest.fixture(scope="session")
 def classes():
    return ["A", "B", "C"]
 def map_labels(numeric_labels, classes):
    return [classes[nl] for nl in numeric_labels]
@pytest.fixture(scope="session")
 def service_estimator(model_type, estimator, classes):
    if model_type == "mock":
        return ServiceEstimatorMock(estimator, classes)
@pytest.mark.parametrize("model_type", ["mock"], scope="session")
@pytest.mark.parametrize("batch_size", [0, 1, 2, 16, 32, 64], scope="session")
 def test_predict(service_estimator, batches, classes):
    input_batch, output_batch = batches
    service_estimator.estimator.output_batch = output_batch
    expected_predictions = map_labels(output_batch, classes)
    assert service_estimator.predict(input_batch) == expected_predictions