Pull request #42: Bugfix/RED-5277 heartbeat

Merge in RR/image-prediction from bugfix/RED-5277-heartbeat to master

* commit '846f127d3ba75c1be124ddc780a4f9c849dc84af':
  update reference
  fix type
  remove commented out code
  import logger from `__init__.py`
  add log config to `__init__.py`
  remove extra stream handler
  update reference
  update refrence
  update reference
  update reference
  update reference
  build dev image and push to nexus
  add logging & only return one object from `process_request()`
  cache loaded pipeline & disable tqdm output by default
  format + set verbose to False by default
  update
This commit is contained in:
Francisco Schulz 2023-02-16 09:54:07 +01:00
commit 2001e9d7f3
7 changed files with 50 additions and 31 deletions

5
.gitignore vendored
View File

@ -1,7 +1,8 @@
.vscode/ .vscode/
*.h5 *.h5
/venv/ *venv*
.idea/ .idea/
src/data
!.gitignore !.gitignore
*.project *.project
@ -172,4 +173,4 @@ fabric.properties
# https://plugins.jetbrains.com/plugin/12206-codestream # https://plugins.jetbrains.com/plugin/12206-codestream
.idea/codestream.xml .idea/codestream.xml
# End of https://www.toptal.com/developers/gitignore/api/linux,pycharm # End of https://www.toptal.com/developers/gitignore/api/linux,pycharm

View File

@ -22,7 +22,8 @@ then
else else
newVersion="${bamboo_planRepository_1_branch}_${bamboo_buildNumber}" newVersion="${bamboo_planRepository_1_branch}_${bamboo_buildNumber}"
echo "gitTag=${newVersion}" > git.tag echo "gitTag=${newVersion}" > git.tag
echo "dev build with tag ${newVersion}" dev_tag="dev"
echo "dev build with tag $dev_tag"
python3 -m venv build_venv python3 -m venv build_venv
source build_venv/bin/activate source build_venv/bin/activate
python3 -m pip install --upgrade pip python3 -m pip install --upgrade pip
@ -34,7 +35,8 @@ else
echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iqser.com/repository/python-combind/simple" >> pip.conf echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iqser.com/repository/python-combind/simple" >> pip.conf
echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001 echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
docker build -f Dockerfile_base -t $SERVICE_NAME_BASE . docker build -f Dockerfile_base -t $SERVICE_NAME_BASE .
docker build -f Dockerfile -t nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion} . docker build -f Dockerfile -t nexus.iqser.com:5001/red/$SERVICE_NAME:$dev_tag .
docker push nexus.iqser.com:5001/red/$SERVICE_NAME:$dev_tag
exit 0 exit 0
fi fi
@ -53,4 +55,4 @@ echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iq
docker build -f Dockerfile_base -t $SERVICE_NAME_BASE . docker build -f Dockerfile_base -t $SERVICE_NAME_BASE .
docker build -f Dockerfile -t nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion} . docker build -f Dockerfile -t nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion} .
echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001 echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
docker push nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion} docker push nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion}

View File

@ -1,26 +1,24 @@
webserver: webserver:
host: $SERVER_HOST|"127.0.0.1" # Webserver address host: $SERVER_HOST|"127.0.0.1" # webserver address
port: $SERVER_PORT|5000 # Webserver port port: $SERVER_PORT|5000 # webserver port
service: service:
logging_level: $LOGGING_LEVEL_ROOT|INFO # Logging level for service logger logging_level: $LOGGING_LEVEL_ROOT|INFO # Logging level for service logger
verbose: $VERBOSE|True # Service prints document processing progress to stdout verbose: $VERBOSE|False # Service DOES NOT prints document processing progress to stdout
batch_size: $BATCH_SIZE|16 # Number of images in memory simultaneously batch_size: $BATCH_SIZE|16 # Number of images in memory simultaneously
mlflow_run_id: $MLFLOW_RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the service_estimator from mlflow_run_id: $MLFLOW_RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the service_estimator from
# These variables control filters that are applied to either images, image metadata or service_estimator predictions. # These variables control filters that are applied to either images, image metadata or service_estimator predictions.
# The filter result values are reported in the service responses. For convenience the response to a request contains a # The filter result values are reported in the service responses. For convenience the response to a request contains a
# "filters.allPassed" field, which is set to false if any of the values returned by the filters did not meet its # "filters.allPassed" field, which is set to false if any of the values returned by the filters did not meet its
# specified required value. # specified required value.
filters: filters:
image_to_page_quotient: # Image size to page size ratio (ratio of geometric means of areas) image_to_page_quotient: # Image size to page size ratio (ratio of geometric means of areas)
min: $MIN_REL_IMAGE_SIZE|0.05 # Minimum permissible min: $MIN_REL_IMAGE_SIZE|0.05 # Minimum permissible
max: $MAX_REL_IMAGE_SIZE|0.75 # Maximum permissible max: $MAX_REL_IMAGE_SIZE|0.75 # Maximum permissible
image_width_to_height_quotient: # Image width to height ratio image_width_to_height_quotient: # Image width to height ratio
min: $MIN_IMAGE_FORMAT|0.1 # Minimum permissible min: $MIN_IMAGE_FORMAT|0.1 # Minimum permissible
max: $MAX_IMAGE_FORMAT|10 # Maximum permissible max: $MAX_IMAGE_FORMAT|10 # Maximum permissible
min_confidence: $MIN_CONFIDENCE|0.5 # Minimum permissible prediction confidence min_confidence: $MIN_CONFIDENCE|0.5 # Minimum permissible prediction confidence

View File

@ -0,0 +1,13 @@
import logging
import sys
# log config
LOG_FORMAT = "%(asctime)s [%(levelname)s] - [%(filename)s -> %(funcName)s() -> %(lineno)s] : %(message)s"
DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler_format = logging.Formatter(LOG_FORMAT, datefmt=DATE_FORMAT)
stream_handler.setFormatter(stream_handler_format)
logger = logging.getLogger(__name__)
logger.propagate = False
logger.addHandler(stream_handler)

View File

@ -1,7 +1,6 @@
import os import os
from functools import partial from functools import lru_cache, partial
from itertools import chain, tee from itertools import chain, tee
from typing import Iterable
from funcy import rcompose, first, compose, second, chunks, identity, rpartial from funcy import rcompose, first, compose, second, chunks, identity, rpartial
from tqdm import tqdm from tqdm import tqdm
@ -20,6 +19,7 @@ from image_prediction.utils.generic import lift, starlift
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
@lru_cache(maxsize=None)
def load_pipeline(**kwargs): def load_pipeline(**kwargs):
model_loader = get_mlflow_model_loader(MLRUNS_DIR) model_loader = get_mlflow_model_loader(MLRUNS_DIR)
model_identifier = CONFIG.service.mlflow_run_id model_identifier = CONFIG.service.mlflow_run_id
@ -38,7 +38,7 @@ def star(f):
class Pipeline: class Pipeline:
def __init__(self, model_loader, model_identifier, batch_size=16, verbose=True, **kwargs): def __init__(self, model_loader, model_identifier, batch_size=16, verbose=False, **kwargs):
self.verbose = verbose self.verbose = verbose
extract = get_extractor(**kwargs) extract = get_extractor(**kwargs)

@ -1 +1 @@
Subproject commit 64d6a8cec62eeddf26bd71a9aabc28b40dcec901 Subproject commit c97ae3d2c242dfc88a342955311dd488cb9a5f60

View File

@ -2,6 +2,7 @@ import gzip
import json import json
import logging import logging
from image_prediction import logger
from image_prediction.config import Config from image_prediction.config import Config
from image_prediction.locations import CONFIG_FILE from image_prediction.locations import CONFIG_FILE
from image_prediction.pipeline import load_pipeline from image_prediction.pipeline import load_pipeline
@ -14,8 +15,6 @@ from pyinfra.storage.storage import get_storage
PYINFRA_CONFIG = config.get_config() PYINFRA_CONFIG = config.get_config()
IMAGE_CONFIG = Config(CONFIG_FILE) IMAGE_CONFIG = Config(CONFIG_FILE)
logging.getLogger().addHandler(logging.StreamHandler())
logger = logging.getLogger("main")
logger.setLevel(PYINFRA_CONFIG.logging_level_root) logger.setLevel(PYINFRA_CONFIG.logging_level_root)
@ -28,28 +27,34 @@ logger.setLevel(PYINFRA_CONFIG.logging_level_root)
def process_request(request_message): def process_request(request_message):
dossier_id = request_message["dossierId"] dossier_id = request_message["dossierId"]
file_id = request_message["fileId"] file_id = request_message["fileId"]
logger.info(f"Processing {dossier_id=} {file_id=} ...")
target_file_name = f"{dossier_id}/{file_id}.{request_message['targetFileExtension']}" target_file_name = f"{dossier_id}/{file_id}.{request_message['targetFileExtension']}"
response_file_name = f"{dossier_id}/{file_id}.{request_message['responseFileExtension']}" response_file_name = f"{dossier_id}/{file_id}.{request_message['responseFileExtension']}"
logger.info("Processing file %s w/ file_id=%s and dossier_id=%s", target_file_name, file_id, dossier_id)
bucket = PYINFRA_CONFIG.storage_bucket bucket = PYINFRA_CONFIG.storage_bucket
storage = get_storage(PYINFRA_CONFIG) storage = get_storage(PYINFRA_CONFIG)
logger.debug("loading model pipeline")
pipeline = load_pipeline(verbose=IMAGE_CONFIG.service.verbose, batch_size=IMAGE_CONFIG.service.batch_size) pipeline = load_pipeline(verbose=IMAGE_CONFIG.service.verbose, batch_size=IMAGE_CONFIG.service.batch_size)
if not storage.exists(bucket, target_file_name): if storage.exists(bucket, target_file_name):
publish_result = False logger.info("fetching file for file_id=%s and dossier_id=%s", file_id, dossier_id)
else:
publish_result = True
object_bytes = storage.get_object(bucket, target_file_name) object_bytes = storage.get_object(bucket, target_file_name)
object_bytes = gzip.decompress(object_bytes) object_bytes = gzip.decompress(object_bytes)
classifications = list(pipeline(pdf=object_bytes)) classifications = list(pipeline(pdf=object_bytes))
logger.info("predictions ready for file_id=%s and dossier_id=%s", file_id, dossier_id)
result = {**request_message, "data": classifications} result = {**request_message, "data": classifications}
storage_bytes = gzip.compress(json.dumps(result).encode("utf-8")) storage_bytes = gzip.compress(json.dumps(result).encode("utf-8"))
logger.info("storing predictions for file_id=%s and dossier_id=%s", file_id, dossier_id)
storage.put_object(bucket, response_file_name, storage_bytes) storage.put_object(bucket, response_file_name, storage_bytes)
return publish_result, {"dossierId": dossier_id, "fileId": file_id} return {"dossierId": dossier_id, "fileId": file_id}
else:
logger.info("no files found for file_id=%s and dossier_id=%s", file_id, dossier_id)
return None
def main(): def main():