add table processing time monitoring

This commit is contained in:
Julius Unverfehrt 2023-03-16 17:32:32 +01:00
parent 08c0096c07
commit 1a4ae6735d
6 changed files with 2190 additions and 2216 deletions

View File

@ -10,15 +10,12 @@ WORKDIR /app/service
COPY ./requirements.txt ./requirements.txt
RUN python3 -m pip install -r requirements.txt
COPY ./incl/pyinfra/requirements.txt ./incl/pyinfra/requirements.txt
RUN python -m pip install -r incl/pyinfra/requirements.txt
COPY ./incl/pdf2image/requirements.txt ./incl/pdf2image/requirements.txt
RUN python -m pip install -r incl/pdf2image/requirements.txt
COPY ./incl ./incl
RUN python3 -m pip install -e incl/pyinfra
RUN python3 -m pip install incl/pyinfra
RUN python3 -m pip install -e incl/pdf2image
COPY ./src ./src

@ -1 +1 @@
Subproject commit 46157031b588b4bda03f8ddbb9c2e2fadbb47af5
Subproject commit ff6f437e8491d48c1e0ccb08ad7b164477e15fbe

4375
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -23,8 +23,6 @@ dvc-ssh = "^2.20.0"
scipy = "^1.8.0"
coverage = "^5.5"
dependency-check = "^0.6.0"
prometheus-client = "^0.13.1"
prometheus_flask_exporter = "^0.19.0"
lorem-text = "^2.1"
PyMuPDF = "^1.19.6"

View File

@ -14,8 +14,6 @@ dvc[ssh]
scipy~=1.8.0
coverage~=5.5
dependency-check~=0.6.0
prometheus-client~=0.13.1
prometheus_flask_exporter~=0.19.0
lorem-text==2.1
# pdf2array

View File

@ -3,10 +3,13 @@ import json
import logging
from operator import itemgetter
from funcy import compose
from cv_analysis.config import get_config
from cv_analysis.server.pipeline import get_analysis_pipeline
from cv_analysis.utils.banner import make_art
from pyinfra import config as pyinfra_config
from pyinfra.payload_processing.monitor import get_monitor
from pyinfra.queue.queue_manager import QueueManager
from pyinfra.storage.storage import get_storage
@ -16,6 +19,10 @@ CV_CONFIG = get_config()
logging.basicConfig(level=PYINFRA_CONFIG.logging_level_root)
# TODO: add kwargs/ operation key passing to processing fn in pyinfra PayloadProcessor be able to use it here.
MONITOR = get_monitor(PYINFRA_CONFIG)
def analysis_callback(queue_message: dict):
dossier_id, file_id, target_file_ext, response_file_ext, operation = itemgetter(
@ -29,18 +36,23 @@ def analysis_callback(queue_message: dict):
if storage.exists(bucket, object_name):
object_bytes = gzip.decompress(storage.get_object(bucket, object_name))
analysis_fn = get_analysis_pipeline(operation, CV_CONFIG.table_parsing_skip_pages_without_images)
analysis_fn = MONITOR(
compose(
list,
get_analysis_pipeline(operation, CV_CONFIG.table_parsing_skip_pages_without_images),
)
)
results = analysis_fn(object_bytes)
results = list(analysis_fn(object_bytes))
logging.info("predictions ready for file_id=%s and dossier_id=%s", file_id, dossier_id)
response = {**queue_message, "data": list(results)}
response = {**queue_message, "data": results}
response = gzip.compress(json.dumps(response).encode())
response_name = f"{dossier_id}/{file_id}.{response_file_ext}"
logging.info("storing predictions for file_id=%s and dossier_id=%s", file_id, dossier_id)
storage.put_object(bucket, response_name, response)
return {"dossierId": dossier_id, "fileId": file_id}
else: