add logging

This commit is contained in:
Francisco Schulz 2023-02-06 16:47:29 +01:00
parent d573b93693
commit 6e7e127cd3
3 changed files with 13 additions and 0 deletions

View File

@ -1,4 +1,5 @@
from typing import Iterable from typing import Iterable
import logging
from funcy import juxt from funcy import juxt
@ -19,8 +20,11 @@ from image_prediction.transformer.transformers.coordinate.pdfnet import PDFNetCo
from image_prediction.transformer.transformers.response import ResponseTransformer from image_prediction.transformer.transformers.response import ResponseTransformer
from pdf2img.extraction import extract_images_via_metadata from pdf2img.extraction import extract_images_via_metadata
logger = logging.getLogger(__name__)
def get_mlflow_model_loader(mlruns_dir): def get_mlflow_model_loader(mlruns_dir):
logger.debug("fetching model loader")
model_loader = ModelLoader(MlflowConnector(MlflowModelReader(mlruns_dir))) model_loader = ModelLoader(MlflowConnector(MlflowModelReader(mlruns_dir)))
return model_loader return model_loader

View File

@ -1,4 +1,5 @@
import os import os
import logging
from functools import partial from functools import partial
from itertools import chain, tee from itertools import chain, tee
from typing import Iterable from typing import Iterable
@ -17,14 +18,18 @@ from image_prediction.utils.generic import lift, starlift
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
logger = logging.getLogger(__name__)
def load_model(mlruns_dir, config): def load_model(mlruns_dir, config):
logger.debug("loading model")
model_loader = get_mlflow_model_loader(mlruns_dir) model_loader = get_mlflow_model_loader(mlruns_dir)
model_identifier = config.service.mlflow_run_id model_identifier = config.service.mlflow_run_id
return get_image_classifier(model_loader, model_identifier) return get_image_classifier(model_loader, model_identifier)
def load_pipeline(**kwargs): def load_pipeline(**kwargs):
logger.debug("loading pipeline")
return Pipeline(**kwargs) return Pipeline(**kwargs)
@ -38,6 +43,7 @@ def star(f):
class Pipeline: class Pipeline:
def __init__(self, model, batch_size=16, verbose=True, **kwargs): def __init__(self, model, batch_size=16, verbose=True, **kwargs):
logger.debug("Pipeline class instantiated")
self.verbose = verbose self.verbose = verbose
extract = get_dispatched_extract(**kwargs) extract = get_dispatched_extract(**kwargs)

View File

@ -40,12 +40,14 @@ def process_request(request_message, bucket=BUCKET, storage=STORAGE, model=MODEL
pipeline = load_pipeline(model=model, verbose=img_config.service.verbose, batch_size=img_config.service.batch_size) pipeline = load_pipeline(model=model, verbose=img_config.service.verbose, batch_size=img_config.service.batch_size)
if storage.exists(bucket, target_file_name): if storage.exists(bucket, target_file_name):
logger.debug("processing target_file_name %s", target_file_name)
should_publish_result = True should_publish_result = True
object_bytes = storage.get_object(bucket, target_file_name) object_bytes = storage.get_object(bucket, target_file_name)
object_bytes = gzip.decompress(object_bytes) object_bytes = gzip.decompress(object_bytes)
classifications = list(pipeline(pdf=object_bytes)) classifications = list(pipeline(pdf=object_bytes))
if storage.exists(bucket, figure_data_file_name): if storage.exists(bucket, figure_data_file_name):
logger.debug("processing figure_data_file_name %s", figure_data_file_name)
metadata_bytes = storage.get_object(bucket, figure_data_file_name) metadata_bytes = storage.get_object(bucket, figure_data_file_name)
metadata_bytes = gzip.decompress(metadata_bytes) metadata_bytes = gzip.decompress(metadata_bytes)
metadata_per_image = json.load(io.BytesIO(metadata_bytes))["data"] metadata_per_image = json.load(io.BytesIO(metadata_bytes))["data"]
@ -54,6 +56,7 @@ def process_request(request_message, bucket=BUCKET, storage=STORAGE, model=MODEL
classifications_cv = [] classifications_cv = []
result = {**request_message, "data": classifications, "dataCV": classifications_cv} result = {**request_message, "data": classifications, "dataCV": classifications_cv}
logger.debug("result message:\n%s", result)
storage_bytes = gzip.compress(json.dumps(result).encode("utf-8")) storage_bytes = gzip.compress(json.dumps(result).encode("utf-8"))
storage.put_object(bucket, response_file_name, storage_bytes) storage.put_object(bucket, response_file_name, storage_bytes)
else: else: