From ad45e2c1dad0f5636c43bbe32ae352d9902dcf15 Mon Sep 17 00:00:00 2001 From: Francisco Schulz Date: Thu, 16 Feb 2023 10:46:55 +0100 Subject: [PATCH 1/3] update reference to pyinfra --- incl/pyinfra | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/incl/pyinfra b/incl/pyinfra index 64d6a8c..c97ae3d 160000 --- a/incl/pyinfra +++ b/incl/pyinfra @@ -1 +1 @@ -Subproject commit 64d6a8cec62eeddf26bd71a9aabc28b40dcec901 +Subproject commit c97ae3d2c242dfc88a342955311dd488cb9a5f60 From eef371e2a867a95da0f023af239d42af9c6fb9d8 Mon Sep 17 00:00:00 2001 From: Francisco Schulz Date: Thu, 16 Feb 2023 10:47:13 +0100 Subject: [PATCH 2/3] update serve.py to work with new pyinfra version --- src/serve.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/serve.py b/src/serve.py index 81405bd..c384aa3 100644 --- a/src/serve.py +++ b/src/serve.py @@ -22,27 +22,29 @@ def analysis_callback(queue_message: dict): "dossierId", "fileId", "targetFileExtension", "responseFileExtension", "operation" )(queue_message) bucket = PYINFRA_CONFIG.storage_bucket - logging.info(f"Processing {dossier_id=}/{file_id=}, {operation=}.") + logging.info("running operation %s file_id=%s and dossier_id=%s", operation, file_id, dossier_id) storage = get_storage(PYINFRA_CONFIG) object_name = f"{dossier_id}/{file_id}.{target_file_ext}" if storage.exists(bucket, object_name): - should_publish_result = True - object_bytes = gzip.decompress(storage.get_object(bucket, object_name)) analysis_fn = get_analysis_pipeline(operation, CV_CONFIG.table_parsing_skip_pages_without_images) results = analysis_fn(object_bytes) + logging.info("predictions ready for file_id=%s and dossier_id=%s", file_id, dossier_id) + response = {**queue_message, "data": list(results)} response = gzip.compress(json.dumps(response).encode()) response_name = f"{dossier_id}/{file_id}.{response_file_ext}" + logging.info("storing predictions for file_id=%s and dossier_id=%s", file_id, dossier_id) storage.put_object(bucket, response_name, response) - else: - should_publish_result = False + + return {"dossierId": dossier_id, "fileId": file_id} - return should_publish_result, {"dossierId": dossier_id, "fileId": file_id} + else: + return None if __name__ == "__main__": From 5bb9282da6aa1d75182c2172c601bed534099b0f Mon Sep 17 00:00:00 2001 From: Francisco Schulz Date: Thu, 16 Feb 2023 11:00:06 +0100 Subject: [PATCH 3/3] use python 3.8 in build --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 1f43274..3d7768c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.10 +FROM python:3.8 RUN python -m venv /app/venv ENV PATH="/app/venv/bin:$PATH"