diff --git a/incl/pyinfra b/incl/pyinfra index be82114..71ad2af 160000 --- a/incl/pyinfra +++ b/incl/pyinfra @@ -1 +1 @@ -Subproject commit be82114f8302ffedecf950c6ca9fecf01ece5573 +Subproject commit 71ad2af4eb278a3718ad5385b06f07faa9059e9f diff --git a/src/serve.py b/src/serve.py index 719e88e..65260da 100644 --- a/src/serve.py +++ b/src/serve.py @@ -20,36 +20,38 @@ logger.setLevel(PYINFRA_CONFIG.logging_level_root) def process_request(request_message): - pipeline = load_pipeline(verbose=IMAGE_CONFIG.service.verbose, batch_size=IMAGE_CONFIG.service.batch_size) - - target_file_extension = request_message["targetFileExtension"] dossier_id = request_message["dossierId"] file_id = request_message["fileId"] + target_file_name = f"{dossier_id}/{file_id}.{request_message['targetFileExtension']}" + response_file_name = f"{dossier_id}/{file_id}.{request_message['responseFileExtension']}" + figure_data_file_name = f"{dossier_id}/{file_id}.FIGURE.json.gz" + bucket = PYINFRA_CONFIG.storage_bucket storage = get_storage(PYINFRA_CONFIG) - object_bytes = storage.get_object(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.{target_file_extension}") - object_bytes = gzip.decompress(object_bytes) + pipeline = load_pipeline(verbose=IMAGE_CONFIG.service.verbose, batch_size=IMAGE_CONFIG.service.batch_size) - if storage.exists(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.FIGURE.json.gz"): - metadata_bytes = storage.get_object(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.FIGURE.json.gz") - metadata_bytes = gzip.decompress(metadata_bytes) - metadata_per_image = json.load(io.BytesIO(metadata_bytes))["data"] - classifications_cv = list(pipeline(pdf=object_bytes, metadata_per_image=metadata_per_image)) + if storage.exists(bucket, target_file_name): + should_publish_result = True + object_bytes = storage.get_object(bucket, target_file_name) + object_bytes = gzip.decompress(object_bytes) + classifications = list(pipeline(pdf=object_bytes)) + + if storage.exists(bucket, figure_data_file_name): + metadata_bytes = storage.get_object(bucket, figure_data_file_name) + metadata_bytes = gzip.decompress(metadata_bytes) + metadata_per_image = json.load(io.BytesIO(metadata_bytes))["data"] + classifications_cv = list(pipeline(pdf=object_bytes, metadata_per_image=metadata_per_image)) + else: + classifications_cv = [] + + result = {**request_message, "data": classifications, "dataCV": classifications_cv} + storage_bytes = gzip.compress(json.dumps(result).encode("utf-8")) + storage.put_object(bucket, response_file_name, storage_bytes) else: - classifications_cv = [] + should_publish_result = False - classifications = list(pipeline(pdf=object_bytes)) - - result = {**request_message, "data": classifications, "dataCV": classifications_cv} - - response_file_extension = request_message["responseFileExtension"] - storage_bytes = gzip.compress(json.dumps(result).encode("utf-8")) - storage.put_object( - PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.{response_file_extension}", storage_bytes - ) - - return {"dossierId": dossier_id, "fileId": file_id} + return should_publish_result, {"dossierId": dossier_id, "fileId": file_id} def main():