estimator + model label mapping

This commit is contained in:
Matthias Bisping 2022-03-25 11:42:31 +01:00
parent ee959346b7
commit 41f0cc8a41
12 changed files with 70 additions and 64 deletions

View File

@ -8,10 +8,10 @@ service:
progressbar: True # Whether a progress bar over the pages of a document is displayed while processing progressbar: True # Whether a progress bar over the pages of a document is displayed while processing
batch_size: $BATCH_SIZE|32 # Number of images in memory simultaneously batch_size: $BATCH_SIZE|32 # Number of images in memory simultaneously
verbose: $VERBOSE|True # Service prints document processing progress to stdout verbose: $VERBOSE|True # Service prints document processing progress to stdout
run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the model from run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the service_estimator from
# These variables control filters that are applied to either images, image metadata or model predictions. The filter # These variables control filters that are applied to either images, image metadata or service_estimator predictions. The filter
# result values are reported in the service responses. For convenience the response to a request contains a # result values are reported in the service responses. For convenience the response to a request contains a
# "filters.allPassed" field, which is set to false if any of the filters returned values did not meet its specified # "filters.allPassed" field, which is set to false if any of the filters returned values did not meet its specified
# required value. # required value.

View File

@ -30,7 +30,7 @@ def make_prediction_server(predict_fn: Callable):
return_dict["result"] = predict_fn(pdf) return_dict["result"] = predict_fn(pdf)
def process(): def process():
# Tensorflow does not free RAM. Workaround is running model in process. # Tensorflow does not free RAM. Workaround is running service_estimator in process.
# https://stackoverflow.com/questions/39758094/clearing-tensorflow-gpu-memory-after-model-execution # https://stackoverflow.com/questions/39758094/clearing-tensorflow-gpu-memory-after-model-execution
pdf = request.data pdf = request.data
manager = multiprocessing.Manager() manager = multiprocessing.Manager()

View File

@ -1,6 +0,0 @@
from image_prediction.model.model import Model
class ModelMock(Model):
def __init__(self, estimator):
super().__init__(estimator=estimator)

View File

@ -1,13 +0,0 @@
import abc
class Model(abc.ABC):
def __init__(self, estimator):
self.__estimator = estimator
@property
def estimator(self):
return self.__estimator
def predict(self, batch):
return self.estimator.predict(batch)

View File

@ -16,15 +16,15 @@ logger = get_logger()
class Predictor: class Predictor:
"""`ModelHandle` wrapper. Forwards to wrapped model handle for prediction and produces structured output that is """`ModelHandle` wrapper. Forwards to wrapped service_estimator handle for prediction and produces structured output that is
interpretable independently of the wrapped model (e.g. with regard to a .classes_ attribute). interpretable independently of the wrapped service_estimator (e.g. with regard to a .classes_ attribute).
""" """
def __init__(self, model_handle: ModelHandle = None): def __init__(self, model_handle: ModelHandle = None):
"""Initializes a ServiceEstimator. """Initializes a ServiceEstimator.
Args: Args:
model_handle: ModelHandle object to forward to for prediction. By default, a model handle is loaded from the model_handle: ModelHandle object to forward to for prediction. By default, a service_estimator handle is loaded from the
mlflow database via CONFIG.service.run_id. mlflow database via CONFIG.service.run_id.
""" """
try: try:

View File

@ -0,0 +1,6 @@
from image_prediction.service_estimator.service_estimator import ServiceEstimator
class ServiceEstimatorMock(ServiceEstimator):
def __init__(self, estimator, classes):
super().__init__(estimator=estimator, classes=classes)

View File

@ -0,0 +1,14 @@
import abc
class ServiceEstimator(abc.ABC):
def __init__(self, estimator, classes):
self.__estimator = estimator
self.__classes = classes
@property
def estimator(self):
return self.__estimator
def predict(self, batch):
return [self.__classes[numeric_label] for numeric_label in self.estimator.predict(batch)]

View File

@ -26,7 +26,7 @@ def make_model():
dense = layers.Dense(64, activation="relu") dense = layers.Dense(64, activation="relu")
x = dense(inputs) x = dense(inputs)
outputs = layers.Dense(10)(x) outputs = layers.Dense(10)(x)
model = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model") model = keras.ServiceEstimator(inputs=inputs, outputs=outputs, name="mnist_model")
model.compile( model.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
optimizer=keras.optimizers.RMSprop(), optimizer=keras.optimizers.RMSprop(),
@ -40,7 +40,7 @@ def make_predict_fn():
model = make_model() model = make_model()
def predict(*args): def predict(*args):
# model = make_model() # service_estimator = make_model()
return model.predict(np.random.random(size=(1, 784))) return model.predict(np.random.random(size=(1, 784)))
return predict return predict

View File

@ -13,7 +13,7 @@ logger = get_logger()
def main(): def main():
def predict(pdf): def predict(pdf):
# Keras model.predict stalls when model was loaded in different process # Keras service_estimator.predict stalls when service_estimator was loaded in different process
# https://stackoverflow.com/questions/42504669/keras-tensorflow-and-multiprocessing-in-python # https://stackoverflow.com/questions/42504669/keras-tensorflow-and-multiprocessing-in-python
predictor = Predictor() predictor = Predictor()
predictions, metadata = predictor.predict_pdf(pdf, verbose=CONFIG.service.progressbar) predictions, metadata = predictor.predict_pdf(pdf, verbose=CONFIG.service.progressbar)

View File

@ -1,36 +0,0 @@
import numpy as np
import pytest
from image_prediction.estimator.mock import EstimatorMock
from image_prediction.model.mock import ModelMock
@pytest.fixture(scope="session")
def estimator():
return EstimatorMock()
@pytest.fixture(scope="session")
def batches(batch_size):
input_batch = np.random.normal(size=(batch_size, 10, 15))
output_batch = np.random.randint(low=42, high=43, size=(batch_size, 10, 15))
return input_batch, output_batch
@pytest.fixture(scope="session")
def classes():
return ["A", "B", "C"]
@pytest.fixture(scope="session")
def model(model_type, estimator):
if model_type == "mock":
return ModelMock(estimator)
@pytest.mark.parametrize("model_type", ["mock"], scope="session")
@pytest.mark.parametrize("batch_size", [0, 1, 2, 16, 32, 64], scope="session")
def test_predict(model, batches):
input_batch, output_batch = batches
model.estimator.output_batch = output_batch
assert np.all(np.equal(model.predict(input_batch), output_batch))

View File

@ -0,0 +1,41 @@
import numpy as np
import pytest
from image_prediction.estimator.mock import EstimatorMock
from image_prediction.service_estimator.mock import ServiceEstimatorMock
@pytest.fixture(scope="session")
def estimator():
return EstimatorMock()
@pytest.fixture(scope="session")
def batches(batch_size, classes):
input_batch = np.random.normal(size=(batch_size, 10, 15))
output_batch = np.random.randint(low=0, high=len(classes), size=batch_size)
return input_batch, output_batch
@pytest.fixture(scope="session")
def classes():
return ["A", "B", "C"]
def map_labels(numeric_labels, classes):
return [classes[nl] for nl in numeric_labels]
@pytest.fixture(scope="session")
def service_estimator(model_type, estimator, classes):
if model_type == "mock":
return ServiceEstimatorMock(estimator, classes)
@pytest.mark.parametrize("model_type", ["mock"], scope="session")
@pytest.mark.parametrize("batch_size", [0, 1, 2, 16, 32, 64], scope="session")
def test_predict(service_estimator, batches, classes):
input_batch, output_batch = batches
service_estimator.estimator.output_batch = output_batch
expected_predictions = map_labels(output_batch, classes)
assert service_estimator.predict(input_batch) == expected_predictions