diff --git a/README.md b/README.md index 437a1e6..0be99e4 100755 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ A configuration is located in `/config.yaml`. All relevant variables can be conf |-------------------------------|--------------------------------|--------------------------------------------------------------------------------| | _service_ | | | | LOGGING_LEVEL_ROOT | DEBUG | Logging level for service logger | +| RESPONSE_AS_FILE | False | Whether the response is stored as file on storage or sent as stream | | RESPONSE_FILE_EXTENSION | ".NER_ENTITIES.json.gz" | Extension to the file that stores the analyized response on storage | | _probing_webserver_ | | | | PROBING_WEBSERVER_HOST | "0.0.0.0" | Probe webserver address | @@ -38,6 +39,75 @@ A configuration is located in `/config.yaml`. All relevant variables can be conf | STORAGE_SECRET | | | | STORAGE_AZURECONNECTIONSTRING | "DefaultEndpointsProtocol=..." | | +## Response Format + +### RESPONSE_AS_FILE == False + +Response-Format: + +```json +{ + "dossierId": "klaus", + "fileId": "1a7fd8ac0da7656a487b68f89188be82", + "imageMetadata": ANALYSIS_DATA +} +``` + +Response-example for image-prediction + +```json +{ + "dossierId": "klaus", + "fileId": "1a7fd8ac0da7656a487b68f89188be82", + "imageMetadata": [ + { + "classification": { + "label": "logo", + "probabilities": { + "formula": 0.0, + "logo": 1.0, + "other": 0.0, + "signature": 0.0 + } + }, + "filters": { + "allPassed": true, + "geometry": { + "imageFormat": { + "quotient": 1.570791527313267, + "tooTall": false, + "tooWide": false + }, + "imageSize": { + "quotient": 0.19059804229011604, + "tooLarge": false, + "tooSmall": false + } + }, + "probability": { + "unconfident": false + } + }, + "geometry": { + "height": 107.63999999999999, + "width": 169.08000000000004 + }, + "position": { + "pageNumber": 1, + "x1": 213.12, + "x2": 382.20000000000005, + "y1": 568.7604, + "y2": 676.4004 + } + } + ] +} +``` + +### RESPONSE_AS_FILE == True + +Creates a respone file on the request storage, named `dossier_Id / file_Id + RESPONSE_FILE_EXTENSION` with the `ANALYSIS_DATA` as content. + ## Development ### Local Setup diff --git a/config.yaml b/config.yaml index a15aee5..c25a203 100755 --- a/config.yaml +++ b/config.yaml @@ -1,8 +1,8 @@ service: logging_level: $LOGGING_LEVEL_ROOT|DEBUG # Logging level for service logger response: - save: True # file-to-storage upload - extension: $RESPONSE_FILE_EXTENSION|".NER_ENTITIES.json.gz" # {.OBJECTS.json.gz | .NER_ENTITIES.json.gz} + save: $RESPONSE_AS_FILE|False # Whether the response is stored as file on storage or sent as stream + extension: $RESPONSE_FILE_EXTENSION|".NER_ENTITIES.json.gz" # {.IMAGE_INFO.json.gz | .NER_ENTITIES.json.gz} probing_webserver: host: $PROBING_WEBSERVER_HOST|"0.0.0.0" # Probe webserver address @@ -36,7 +36,7 @@ storage: backend: $STORAGE_BACKEND|s3 # The type of storage to use {s3, azure} bucket: $STORAGE_BUCKET|"pyinfra-test-bucket" # The bucket / container to pull files specified in queue requests from - target_file_extension: $TARGET_FILE_EXTENSION|".TEXT.json.gz" # {.TEXT.json.gz | .ORIGIN.pdf.gz} Defines type of file to pull from storage + target_file_extension: $TARGET_FILE_EXTENSION|".ORIGIN.pdf.gz" # {.TEXT.json.gz | .ORIGIN.pdf.gz} Defines type of file to pull from storage s3: endpoint: $STORAGE_ENDPOINT|"http://127.0.0.1:9000" diff --git a/pyinfra/callback.py b/pyinfra/callback.py index 5684bec..9d7d744 100644 --- a/pyinfra/callback.py +++ b/pyinfra/callback.py @@ -62,15 +62,18 @@ def make_callback_for_output_queue(json_wrapped_body_processor, output_queue_nam dossier_id, file_id, result = json_wrapped_body_processor(body) + # TODO Unify analysis Repsonse for image-prediction and ner-prediction if not CONFIG.service.response.save: - channel.basic_publish(exchange="", routing_key=output_queue_name, body=result) + result = { "dossierId": dossier_id, "fileId": file_id, "imageMetadata": result} + result = json.dumps(result) else: + result = json.dumps(result) upload_compressed_response( get_storage(CONFIG.storage.backend), CONFIG.storage.bucket, dossier_id, file_id, result ) result = json.dumps({"dossierId": dossier_id, "fileId": file_id}) - channel.basic_publish(exchange="", routing_key=output_queue_name, body=result) + channel.basic_publish(exchange="", routing_key=output_queue_name, body=result) channel.basic_ack(delivery_tag=method.delivery_tag) return callback diff --git a/pyinfra/core.py b/pyinfra/core.py index 9a6c82f..2a5bd5f 100644 --- a/pyinfra/core.py +++ b/pyinfra/core.py @@ -62,7 +62,7 @@ def make_payload_processor(load_data, analyze_file): dossier_id, file_id = itemgetter("dossierId", "fileId")(payload) data = load_data(payload) predictions = analyze_file(data) - return dossier_id, file_id, json.dumps(predictions) + return dossier_id, file_id, predictions except (DataLoadingFailure, AnalysisFailure) as err: logging.warning(f"Processing of {payload} failed.") raise ProcessingFailure() from err