Pull request #42: Bugfix/RED-5277 heartbeat

Merge in RR/image-prediction from bugfix/RED-5277-heartbeat to master

* commit '846f127d3ba75c1be124ddc780a4f9c849dc84af':
  update reference
  fix type
  remove commented out code
  import logger from `__init__.py`
  add log config to `__init__.py`
  remove extra stream handler
  update reference
  update refrence
  update reference
  update reference
  update reference
  build dev image and push to nexus
  add logging & only return one object from `process_request()`
  cache loaded pipeline & disable tqdm output by default
  format + set verbose to False by default
  update
This commit is contained in:
Francisco Schulz 2023-02-16 09:54:07 +01:00
commit 2001e9d7f3
7 changed files with 50 additions and 31 deletions

5
.gitignore vendored
View File

@ -1,7 +1,8 @@
.vscode/
*.h5
/venv/
*venv*
.idea/
src/data
!.gitignore
*.project
@ -172,4 +173,4 @@ fabric.properties
# https://plugins.jetbrains.com/plugin/12206-codestream
.idea/codestream.xml
# End of https://www.toptal.com/developers/gitignore/api/linux,pycharm
# End of https://www.toptal.com/developers/gitignore/api/linux,pycharm

View File

@ -22,7 +22,8 @@ then
else
newVersion="${bamboo_planRepository_1_branch}_${bamboo_buildNumber}"
echo "gitTag=${newVersion}" > git.tag
echo "dev build with tag ${newVersion}"
dev_tag="dev"
echo "dev build with tag $dev_tag"
python3 -m venv build_venv
source build_venv/bin/activate
python3 -m pip install --upgrade pip
@ -34,7 +35,8 @@ else
echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iqser.com/repository/python-combind/simple" >> pip.conf
echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
docker build -f Dockerfile_base -t $SERVICE_NAME_BASE .
docker build -f Dockerfile -t nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion} .
docker build -f Dockerfile -t nexus.iqser.com:5001/red/$SERVICE_NAME:$dev_tag .
docker push nexus.iqser.com:5001/red/$SERVICE_NAME:$dev_tag
exit 0
fi
@ -53,4 +55,4 @@ echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iq
docker build -f Dockerfile_base -t $SERVICE_NAME_BASE .
docker build -f Dockerfile -t nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion} .
echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
docker push nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion}
docker push nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion}

View File

@ -1,26 +1,24 @@
webserver:
host: $SERVER_HOST|"127.0.0.1" # Webserver address
port: $SERVER_PORT|5000 # Webserver port
host: $SERVER_HOST|"127.0.0.1" # webserver address
port: $SERVER_PORT|5000 # webserver port
service:
logging_level: $LOGGING_LEVEL_ROOT|INFO # Logging level for service logger
verbose: $VERBOSE|True # Service prints document processing progress to stdout
batch_size: $BATCH_SIZE|16 # Number of images in memory simultaneously
mlflow_run_id: $MLFLOW_RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the service_estimator from
logging_level: $LOGGING_LEVEL_ROOT|INFO # Logging level for service logger
verbose: $VERBOSE|False # Service DOES NOT prints document processing progress to stdout
batch_size: $BATCH_SIZE|16 # Number of images in memory simultaneously
mlflow_run_id: $MLFLOW_RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the service_estimator from
# These variables control filters that are applied to either images, image metadata or service_estimator predictions.
# The filter result values are reported in the service responses. For convenience the response to a request contains a
# "filters.allPassed" field, which is set to false if any of the values returned by the filters did not meet its
# specified required value.
filters:
image_to_page_quotient: # Image size to page size ratio (ratio of geometric means of areas)
min: $MIN_REL_IMAGE_SIZE|0.05 # Minimum permissible
max: $MAX_REL_IMAGE_SIZE|0.75 # Maximum permissible
min: $MIN_REL_IMAGE_SIZE|0.05 # Minimum permissible
max: $MAX_REL_IMAGE_SIZE|0.75 # Maximum permissible
image_width_to_height_quotient: # Image width to height ratio
min: $MIN_IMAGE_FORMAT|0.1 # Minimum permissible
max: $MAX_IMAGE_FORMAT|10 # Maximum permissible
image_width_to_height_quotient: # Image width to height ratio
min: $MIN_IMAGE_FORMAT|0.1 # Minimum permissible
max: $MAX_IMAGE_FORMAT|10 # Maximum permissible
min_confidence: $MIN_CONFIDENCE|0.5 # Minimum permissible prediction confidence
min_confidence: $MIN_CONFIDENCE|0.5 # Minimum permissible prediction confidence

View File

@ -0,0 +1,13 @@
import logging
import sys
# log config
LOG_FORMAT = "%(asctime)s [%(levelname)s] - [%(filename)s -> %(funcName)s() -> %(lineno)s] : %(message)s"
DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler_format = logging.Formatter(LOG_FORMAT, datefmt=DATE_FORMAT)
stream_handler.setFormatter(stream_handler_format)
logger = logging.getLogger(__name__)
logger.propagate = False
logger.addHandler(stream_handler)

View File

@ -1,7 +1,6 @@
import os
from functools import partial
from functools import lru_cache, partial
from itertools import chain, tee
from typing import Iterable
from funcy import rcompose, first, compose, second, chunks, identity, rpartial
from tqdm import tqdm
@ -20,6 +19,7 @@ from image_prediction.utils.generic import lift, starlift
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
@lru_cache(maxsize=None)
def load_pipeline(**kwargs):
model_loader = get_mlflow_model_loader(MLRUNS_DIR)
model_identifier = CONFIG.service.mlflow_run_id
@ -38,7 +38,7 @@ def star(f):
class Pipeline:
def __init__(self, model_loader, model_identifier, batch_size=16, verbose=True, **kwargs):
def __init__(self, model_loader, model_identifier, batch_size=16, verbose=False, **kwargs):
self.verbose = verbose
extract = get_extractor(**kwargs)

@ -1 +1 @@
Subproject commit 64d6a8cec62eeddf26bd71a9aabc28b40dcec901
Subproject commit c97ae3d2c242dfc88a342955311dd488cb9a5f60

View File

@ -2,6 +2,7 @@ import gzip
import json
import logging
from image_prediction import logger
from image_prediction.config import Config
from image_prediction.locations import CONFIG_FILE
from image_prediction.pipeline import load_pipeline
@ -14,8 +15,6 @@ from pyinfra.storage.storage import get_storage
PYINFRA_CONFIG = config.get_config()
IMAGE_CONFIG = Config(CONFIG_FILE)
logging.getLogger().addHandler(logging.StreamHandler())
logger = logging.getLogger("main")
logger.setLevel(PYINFRA_CONFIG.logging_level_root)
@ -28,28 +27,34 @@ logger.setLevel(PYINFRA_CONFIG.logging_level_root)
def process_request(request_message):
dossier_id = request_message["dossierId"]
file_id = request_message["fileId"]
logger.info(f"Processing {dossier_id=} {file_id=} ...")
target_file_name = f"{dossier_id}/{file_id}.{request_message['targetFileExtension']}"
response_file_name = f"{dossier_id}/{file_id}.{request_message['responseFileExtension']}"
logger.info("Processing file %s w/ file_id=%s and dossier_id=%s", target_file_name, file_id, dossier_id)
bucket = PYINFRA_CONFIG.storage_bucket
storage = get_storage(PYINFRA_CONFIG)
logger.debug("loading model pipeline")
pipeline = load_pipeline(verbose=IMAGE_CONFIG.service.verbose, batch_size=IMAGE_CONFIG.service.batch_size)
if not storage.exists(bucket, target_file_name):
publish_result = False
else:
publish_result = True
if storage.exists(bucket, target_file_name):
logger.info("fetching file for file_id=%s and dossier_id=%s", file_id, dossier_id)
object_bytes = storage.get_object(bucket, target_file_name)
object_bytes = gzip.decompress(object_bytes)
classifications = list(pipeline(pdf=object_bytes))
logger.info("predictions ready for file_id=%s and dossier_id=%s", file_id, dossier_id)
result = {**request_message, "data": classifications}
storage_bytes = gzip.compress(json.dumps(result).encode("utf-8"))
logger.info("storing predictions for file_id=%s and dossier_id=%s", file_id, dossier_id)
storage.put_object(bucket, response_file_name, storage_bytes)
return publish_result, {"dossierId": dossier_id, "fileId": file_id}
return {"dossierId": dossier_id, "fileId": file_id}
else:
logger.info("no files found for file_id=%s and dossier_id=%s", file_id, dossier_id)
return None
def main():