add table processing time monitoring
This commit is contained in:
parent
08c0096c07
commit
1a4ae6735d
@ -10,15 +10,12 @@ WORKDIR /app/service
|
||||
COPY ./requirements.txt ./requirements.txt
|
||||
RUN python3 -m pip install -r requirements.txt
|
||||
|
||||
COPY ./incl/pyinfra/requirements.txt ./incl/pyinfra/requirements.txt
|
||||
RUN python -m pip install -r incl/pyinfra/requirements.txt
|
||||
|
||||
COPY ./incl/pdf2image/requirements.txt ./incl/pdf2image/requirements.txt
|
||||
RUN python -m pip install -r incl/pdf2image/requirements.txt
|
||||
|
||||
COPY ./incl ./incl
|
||||
|
||||
RUN python3 -m pip install -e incl/pyinfra
|
||||
RUN python3 -m pip install incl/pyinfra
|
||||
RUN python3 -m pip install -e incl/pdf2image
|
||||
|
||||
COPY ./src ./src
|
||||
|
||||
@ -1 +1 @@
|
||||
Subproject commit 46157031b588b4bda03f8ddbb9c2e2fadbb47af5
|
||||
Subproject commit ff6f437e8491d48c1e0ccb08ad7b164477e15fbe
|
||||
4375
poetry.lock
generated
4375
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -23,8 +23,6 @@ dvc-ssh = "^2.20.0"
|
||||
scipy = "^1.8.0"
|
||||
coverage = "^5.5"
|
||||
dependency-check = "^0.6.0"
|
||||
prometheus-client = "^0.13.1"
|
||||
prometheus_flask_exporter = "^0.19.0"
|
||||
lorem-text = "^2.1"
|
||||
PyMuPDF = "^1.19.6"
|
||||
|
||||
|
||||
@ -14,8 +14,6 @@ dvc[ssh]
|
||||
scipy~=1.8.0
|
||||
coverage~=5.5
|
||||
dependency-check~=0.6.0
|
||||
prometheus-client~=0.13.1
|
||||
prometheus_flask_exporter~=0.19.0
|
||||
lorem-text==2.1
|
||||
|
||||
# pdf2array
|
||||
|
||||
20
src/serve.py
20
src/serve.py
@ -3,10 +3,13 @@ import json
|
||||
import logging
|
||||
from operator import itemgetter
|
||||
|
||||
from funcy import compose
|
||||
|
||||
from cv_analysis.config import get_config
|
||||
from cv_analysis.server.pipeline import get_analysis_pipeline
|
||||
from cv_analysis.utils.banner import make_art
|
||||
from pyinfra import config as pyinfra_config
|
||||
from pyinfra.payload_processing.monitor import get_monitor
|
||||
from pyinfra.queue.queue_manager import QueueManager
|
||||
from pyinfra.storage.storage import get_storage
|
||||
|
||||
@ -16,6 +19,10 @@ CV_CONFIG = get_config()
|
||||
logging.basicConfig(level=PYINFRA_CONFIG.logging_level_root)
|
||||
|
||||
|
||||
# TODO: add kwargs/ operation key passing to processing fn in pyinfra PayloadProcessor be able to use it here.
|
||||
MONITOR = get_monitor(PYINFRA_CONFIG)
|
||||
|
||||
|
||||
def analysis_callback(queue_message: dict):
|
||||
|
||||
dossier_id, file_id, target_file_ext, response_file_ext, operation = itemgetter(
|
||||
@ -29,18 +36,23 @@ def analysis_callback(queue_message: dict):
|
||||
|
||||
if storage.exists(bucket, object_name):
|
||||
object_bytes = gzip.decompress(storage.get_object(bucket, object_name))
|
||||
analysis_fn = get_analysis_pipeline(operation, CV_CONFIG.table_parsing_skip_pages_without_images)
|
||||
analysis_fn = MONITOR(
|
||||
compose(
|
||||
list,
|
||||
get_analysis_pipeline(operation, CV_CONFIG.table_parsing_skip_pages_without_images),
|
||||
)
|
||||
)
|
||||
|
||||
results = analysis_fn(object_bytes)
|
||||
results = list(analysis_fn(object_bytes))
|
||||
logging.info("predictions ready for file_id=%s and dossier_id=%s", file_id, dossier_id)
|
||||
|
||||
response = {**queue_message, "data": list(results)}
|
||||
response = {**queue_message, "data": results}
|
||||
response = gzip.compress(json.dumps(response).encode())
|
||||
response_name = f"{dossier_id}/{file_id}.{response_file_ext}"
|
||||
|
||||
logging.info("storing predictions for file_id=%s and dossier_id=%s", file_id, dossier_id)
|
||||
storage.put_object(bucket, response_name, response)
|
||||
|
||||
|
||||
return {"dossierId": dossier_id, "fileId": file_id}
|
||||
|
||||
else:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user