diff --git a/src/image_prediction/default_objects.py b/src/image_prediction/default_objects.py index d66d477..1682e74 100644 --- a/src/image_prediction/default_objects.py +++ b/src/image_prediction/default_objects.py @@ -1,4 +1,5 @@ from typing import Iterable +import logging from funcy import juxt @@ -19,8 +20,11 @@ from image_prediction.transformer.transformers.coordinate.pdfnet import PDFNetCo from image_prediction.transformer.transformers.response import ResponseTransformer from pdf2img.extraction import extract_images_via_metadata +logger = logging.getLogger(__name__) + def get_mlflow_model_loader(mlruns_dir): + logger.debug("fetching model loader") model_loader = ModelLoader(MlflowConnector(MlflowModelReader(mlruns_dir))) return model_loader diff --git a/src/image_prediction/pipeline.py b/src/image_prediction/pipeline.py index d69e453..61d5b99 100644 --- a/src/image_prediction/pipeline.py +++ b/src/image_prediction/pipeline.py @@ -1,4 +1,5 @@ import os +import logging from functools import partial from itertools import chain, tee from typing import Iterable @@ -17,14 +18,18 @@ from image_prediction.utils.generic import lift, starlift os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" +logger = logging.getLogger(__name__) + def load_model(mlruns_dir, config): + logger.debug("loading model") model_loader = get_mlflow_model_loader(mlruns_dir) model_identifier = config.service.mlflow_run_id return get_image_classifier(model_loader, model_identifier) def load_pipeline(**kwargs): + logger.debug("loading pipeline") return Pipeline(**kwargs) @@ -38,6 +43,7 @@ def star(f): class Pipeline: def __init__(self, model, batch_size=16, verbose=True, **kwargs): + logger.debug("Pipeline class instantiated") self.verbose = verbose extract = get_dispatched_extract(**kwargs) diff --git a/src/serve.py b/src/serve.py index de94f03..51caf8e 100644 --- a/src/serve.py +++ b/src/serve.py @@ -40,12 +40,14 @@ def process_request(request_message, bucket=BUCKET, storage=STORAGE, model=MODEL pipeline = load_pipeline(model=model, verbose=img_config.service.verbose, batch_size=img_config.service.batch_size) if storage.exists(bucket, target_file_name): + logger.debug("processing target_file_name %s", target_file_name) should_publish_result = True object_bytes = storage.get_object(bucket, target_file_name) object_bytes = gzip.decompress(object_bytes) classifications = list(pipeline(pdf=object_bytes)) if storage.exists(bucket, figure_data_file_name): + logger.debug("processing figure_data_file_name %s", figure_data_file_name) metadata_bytes = storage.get_object(bucket, figure_data_file_name) metadata_bytes = gzip.decompress(metadata_bytes) metadata_per_image = json.load(io.BytesIO(metadata_bytes))["data"] @@ -54,6 +56,7 @@ def process_request(request_message, bucket=BUCKET, storage=STORAGE, model=MODEL classifications_cv = [] result = {**request_message, "data": classifications, "dataCV": classifications_cv} + logger.debug("result message:\n%s", result) storage_bytes = gzip.compress(json.dumps(result).encode("utf-8")) storage.put_object(bucket, response_file_name, storage_bytes) else: