Pull request #42: Bugfix/RED-5277 heartbeat
Merge in RR/image-prediction from bugfix/RED-5277-heartbeat to master * commit '846f127d3ba75c1be124ddc780a4f9c849dc84af': update reference fix type remove commented out code import logger from `__init__.py` add log config to `__init__.py` remove extra stream handler update reference update refrence update reference update reference update reference build dev image and push to nexus add logging & only return one object from `process_request()` cache loaded pipeline & disable tqdm output by default format + set verbose to False by default update
This commit is contained in:
commit
2001e9d7f3
5
.gitignore
vendored
5
.gitignore
vendored
@ -1,7 +1,8 @@
|
||||
.vscode/
|
||||
*.h5
|
||||
/venv/
|
||||
*venv*
|
||||
.idea/
|
||||
src/data
|
||||
|
||||
!.gitignore
|
||||
*.project
|
||||
@ -172,4 +173,4 @@ fabric.properties
|
||||
# https://plugins.jetbrains.com/plugin/12206-codestream
|
||||
.idea/codestream.xml
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/linux,pycharm
|
||||
# End of https://www.toptal.com/developers/gitignore/api/linux,pycharm
|
||||
@ -22,7 +22,8 @@ then
|
||||
else
|
||||
newVersion="${bamboo_planRepository_1_branch}_${bamboo_buildNumber}"
|
||||
echo "gitTag=${newVersion}" > git.tag
|
||||
echo "dev build with tag ${newVersion}"
|
||||
dev_tag="dev"
|
||||
echo "dev build with tag $dev_tag"
|
||||
python3 -m venv build_venv
|
||||
source build_venv/bin/activate
|
||||
python3 -m pip install --upgrade pip
|
||||
@ -34,7 +35,8 @@ else
|
||||
echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iqser.com/repository/python-combind/simple" >> pip.conf
|
||||
echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
|
||||
docker build -f Dockerfile_base -t $SERVICE_NAME_BASE .
|
||||
docker build -f Dockerfile -t nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion} .
|
||||
docker build -f Dockerfile -t nexus.iqser.com:5001/red/$SERVICE_NAME:$dev_tag .
|
||||
docker push nexus.iqser.com:5001/red/$SERVICE_NAME:$dev_tag
|
||||
exit 0
|
||||
fi
|
||||
|
||||
@ -53,4 +55,4 @@ echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iq
|
||||
docker build -f Dockerfile_base -t $SERVICE_NAME_BASE .
|
||||
docker build -f Dockerfile -t nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion} .
|
||||
echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
|
||||
docker push nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion}
|
||||
docker push nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion}
|
||||
26
config.yaml
26
config.yaml
@ -1,26 +1,24 @@
|
||||
webserver:
|
||||
host: $SERVER_HOST|"127.0.0.1" # Webserver address
|
||||
port: $SERVER_PORT|5000 # Webserver port
|
||||
host: $SERVER_HOST|"127.0.0.1" # webserver address
|
||||
port: $SERVER_PORT|5000 # webserver port
|
||||
|
||||
service:
|
||||
logging_level: $LOGGING_LEVEL_ROOT|INFO # Logging level for service logger
|
||||
verbose: $VERBOSE|True # Service prints document processing progress to stdout
|
||||
batch_size: $BATCH_SIZE|16 # Number of images in memory simultaneously
|
||||
mlflow_run_id: $MLFLOW_RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the service_estimator from
|
||||
|
||||
logging_level: $LOGGING_LEVEL_ROOT|INFO # Logging level for service logger
|
||||
verbose: $VERBOSE|False # Service DOES NOT prints document processing progress to stdout
|
||||
batch_size: $BATCH_SIZE|16 # Number of images in memory simultaneously
|
||||
mlflow_run_id: $MLFLOW_RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the service_estimator from
|
||||
|
||||
# These variables control filters that are applied to either images, image metadata or service_estimator predictions.
|
||||
# The filter result values are reported in the service responses. For convenience the response to a request contains a
|
||||
# "filters.allPassed" field, which is set to false if any of the values returned by the filters did not meet its
|
||||
# specified required value.
|
||||
filters:
|
||||
|
||||
image_to_page_quotient: # Image size to page size ratio (ratio of geometric means of areas)
|
||||
min: $MIN_REL_IMAGE_SIZE|0.05 # Minimum permissible
|
||||
max: $MAX_REL_IMAGE_SIZE|0.75 # Maximum permissible
|
||||
min: $MIN_REL_IMAGE_SIZE|0.05 # Minimum permissible
|
||||
max: $MAX_REL_IMAGE_SIZE|0.75 # Maximum permissible
|
||||
|
||||
image_width_to_height_quotient: # Image width to height ratio
|
||||
min: $MIN_IMAGE_FORMAT|0.1 # Minimum permissible
|
||||
max: $MAX_IMAGE_FORMAT|10 # Maximum permissible
|
||||
image_width_to_height_quotient: # Image width to height ratio
|
||||
min: $MIN_IMAGE_FORMAT|0.1 # Minimum permissible
|
||||
max: $MAX_IMAGE_FORMAT|10 # Maximum permissible
|
||||
|
||||
min_confidence: $MIN_CONFIDENCE|0.5 # Minimum permissible prediction confidence
|
||||
min_confidence: $MIN_CONFIDENCE|0.5 # Minimum permissible prediction confidence
|
||||
|
||||
@ -0,0 +1,13 @@
|
||||
import logging
|
||||
import sys
|
||||
|
||||
# log config
|
||||
LOG_FORMAT = "%(asctime)s [%(levelname)s] - [%(filename)s -> %(funcName)s() -> %(lineno)s] : %(message)s"
|
||||
DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
|
||||
stream_handler = logging.StreamHandler(sys.stdout)
|
||||
stream_handler_format = logging.Formatter(LOG_FORMAT, datefmt=DATE_FORMAT)
|
||||
stream_handler.setFormatter(stream_handler_format)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.propagate = False
|
||||
logger.addHandler(stream_handler)
|
||||
@ -1,7 +1,6 @@
|
||||
import os
|
||||
from functools import partial
|
||||
from functools import lru_cache, partial
|
||||
from itertools import chain, tee
|
||||
from typing import Iterable
|
||||
|
||||
from funcy import rcompose, first, compose, second, chunks, identity, rpartial
|
||||
from tqdm import tqdm
|
||||
@ -20,6 +19,7 @@ from image_prediction.utils.generic import lift, starlift
|
||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def load_pipeline(**kwargs):
|
||||
model_loader = get_mlflow_model_loader(MLRUNS_DIR)
|
||||
model_identifier = CONFIG.service.mlflow_run_id
|
||||
@ -38,7 +38,7 @@ def star(f):
|
||||
|
||||
|
||||
class Pipeline:
|
||||
def __init__(self, model_loader, model_identifier, batch_size=16, verbose=True, **kwargs):
|
||||
def __init__(self, model_loader, model_identifier, batch_size=16, verbose=False, **kwargs):
|
||||
self.verbose = verbose
|
||||
|
||||
extract = get_extractor(**kwargs)
|
||||
|
||||
@ -1 +1 @@
|
||||
Subproject commit 64d6a8cec62eeddf26bd71a9aabc28b40dcec901
|
||||
Subproject commit c97ae3d2c242dfc88a342955311dd488cb9a5f60
|
||||
21
src/serve.py
21
src/serve.py
@ -2,6 +2,7 @@ import gzip
|
||||
import json
|
||||
import logging
|
||||
|
||||
from image_prediction import logger
|
||||
from image_prediction.config import Config
|
||||
from image_prediction.locations import CONFIG_FILE
|
||||
from image_prediction.pipeline import load_pipeline
|
||||
@ -14,8 +15,6 @@ from pyinfra.storage.storage import get_storage
|
||||
PYINFRA_CONFIG = config.get_config()
|
||||
IMAGE_CONFIG = Config(CONFIG_FILE)
|
||||
|
||||
logging.getLogger().addHandler(logging.StreamHandler())
|
||||
logger = logging.getLogger("main")
|
||||
logger.setLevel(PYINFRA_CONFIG.logging_level_root)
|
||||
|
||||
|
||||
@ -28,28 +27,34 @@ logger.setLevel(PYINFRA_CONFIG.logging_level_root)
|
||||
def process_request(request_message):
|
||||
dossier_id = request_message["dossierId"]
|
||||
file_id = request_message["fileId"]
|
||||
logger.info(f"Processing {dossier_id=} {file_id=} ...")
|
||||
target_file_name = f"{dossier_id}/{file_id}.{request_message['targetFileExtension']}"
|
||||
response_file_name = f"{dossier_id}/{file_id}.{request_message['responseFileExtension']}"
|
||||
logger.info("Processing file %s w/ file_id=%s and dossier_id=%s", target_file_name, file_id, dossier_id)
|
||||
|
||||
bucket = PYINFRA_CONFIG.storage_bucket
|
||||
storage = get_storage(PYINFRA_CONFIG)
|
||||
|
||||
logger.debug("loading model pipeline")
|
||||
pipeline = load_pipeline(verbose=IMAGE_CONFIG.service.verbose, batch_size=IMAGE_CONFIG.service.batch_size)
|
||||
|
||||
if not storage.exists(bucket, target_file_name):
|
||||
publish_result = False
|
||||
else:
|
||||
publish_result = True
|
||||
if storage.exists(bucket, target_file_name):
|
||||
logger.info("fetching file for file_id=%s and dossier_id=%s", file_id, dossier_id)
|
||||
object_bytes = storage.get_object(bucket, target_file_name)
|
||||
object_bytes = gzip.decompress(object_bytes)
|
||||
|
||||
classifications = list(pipeline(pdf=object_bytes))
|
||||
logger.info("predictions ready for file_id=%s and dossier_id=%s", file_id, dossier_id)
|
||||
|
||||
result = {**request_message, "data": classifications}
|
||||
storage_bytes = gzip.compress(json.dumps(result).encode("utf-8"))
|
||||
|
||||
logger.info("storing predictions for file_id=%s and dossier_id=%s", file_id, dossier_id)
|
||||
storage.put_object(bucket, response_file_name, storage_bytes)
|
||||
|
||||
return publish_result, {"dossierId": dossier_id, "fileId": file_id}
|
||||
return {"dossierId": dossier_id, "fileId": file_id}
|
||||
else:
|
||||
logger.info("no files found for file_id=%s and dossier_id=%s", file_id, dossier_id)
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user