add logging

This commit is contained in:
Francisco Schulz 2023-02-06 16:47:29 +01:00
parent d573b93693
commit 6e7e127cd3
3 changed files with 13 additions and 0 deletions

View File

@ -1,4 +1,5 @@
from typing import Iterable
import logging
from funcy import juxt
@ -19,8 +20,11 @@ from image_prediction.transformer.transformers.coordinate.pdfnet import PDFNetCo
from image_prediction.transformer.transformers.response import ResponseTransformer
from pdf2img.extraction import extract_images_via_metadata
logger = logging.getLogger(__name__)
def get_mlflow_model_loader(mlruns_dir):
logger.debug("fetching model loader")
model_loader = ModelLoader(MlflowConnector(MlflowModelReader(mlruns_dir)))
return model_loader

View File

@ -1,4 +1,5 @@
import os
import logging
from functools import partial
from itertools import chain, tee
from typing import Iterable
@ -17,14 +18,18 @@ from image_prediction.utils.generic import lift, starlift
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
logger = logging.getLogger(__name__)
def load_model(mlruns_dir, config):
logger.debug("loading model")
model_loader = get_mlflow_model_loader(mlruns_dir)
model_identifier = config.service.mlflow_run_id
return get_image_classifier(model_loader, model_identifier)
def load_pipeline(**kwargs):
logger.debug("loading pipeline")
return Pipeline(**kwargs)
@ -38,6 +43,7 @@ def star(f):
class Pipeline:
def __init__(self, model, batch_size=16, verbose=True, **kwargs):
logger.debug("Pipeline class instantiated")
self.verbose = verbose
extract = get_dispatched_extract(**kwargs)

View File

@ -40,12 +40,14 @@ def process_request(request_message, bucket=BUCKET, storage=STORAGE, model=MODEL
pipeline = load_pipeline(model=model, verbose=img_config.service.verbose, batch_size=img_config.service.batch_size)
if storage.exists(bucket, target_file_name):
logger.debug("processing target_file_name %s", target_file_name)
should_publish_result = True
object_bytes = storage.get_object(bucket, target_file_name)
object_bytes = gzip.decompress(object_bytes)
classifications = list(pipeline(pdf=object_bytes))
if storage.exists(bucket, figure_data_file_name):
logger.debug("processing figure_data_file_name %s", figure_data_file_name)
metadata_bytes = storage.get_object(bucket, figure_data_file_name)
metadata_bytes = gzip.decompress(metadata_bytes)
metadata_per_image = json.load(io.BytesIO(metadata_bytes))["data"]
@ -54,6 +56,7 @@ def process_request(request_message, bucket=BUCKET, storage=STORAGE, model=MODEL
classifications_cv = []
result = {**request_message, "data": classifications, "dataCV": classifications_cv}
logger.debug("result message:\n%s", result)
storage_bytes = gzip.compress(json.dumps(result).encode("utf-8"))
storage.put_object(bucket, response_file_name, storage_bytes)
else: