Adjust pyinfra to give standart image-service response

This commit is contained in:
Julius Unverfehrt 2022-03-02 09:34:38 +01:00
parent 8def5ed4d6
commit 7a21a64a56
4 changed files with 79 additions and 6 deletions

View File

@ -10,6 +10,7 @@ A configuration is located in `/config.yaml`. All relevant variables can be conf
|-------------------------------|--------------------------------|--------------------------------------------------------------------------------|
| _service_ | | |
| LOGGING_LEVEL_ROOT | DEBUG | Logging level for service logger |
| RESPONSE_AS_FILE | False | Whether the response is stored as file on storage or sent as stream |
| RESPONSE_FILE_EXTENSION | ".NER_ENTITIES.json.gz" | Extension to the file that stores the analyized response on storage |
| _probing_webserver_ | | |
| PROBING_WEBSERVER_HOST | "0.0.0.0" | Probe webserver address |
@ -38,6 +39,75 @@ A configuration is located in `/config.yaml`. All relevant variables can be conf
| STORAGE_SECRET | | |
| STORAGE_AZURECONNECTIONSTRING | "DefaultEndpointsProtocol=..." | |
## Response Format
### RESPONSE_AS_FILE == False
Response-Format:
```json
{
"dossierId": "klaus",
"fileId": "1a7fd8ac0da7656a487b68f89188be82",
"imageMetadata": ANALYSIS_DATA
}
```
Response-example for image-prediction
```json
{
"dossierId": "klaus",
"fileId": "1a7fd8ac0da7656a487b68f89188be82",
"imageMetadata": [
{
"classification": {
"label": "logo",
"probabilities": {
"formula": 0.0,
"logo": 1.0,
"other": 0.0,
"signature": 0.0
}
},
"filters": {
"allPassed": true,
"geometry": {
"imageFormat": {
"quotient": 1.570791527313267,
"tooTall": false,
"tooWide": false
},
"imageSize": {
"quotient": 0.19059804229011604,
"tooLarge": false,
"tooSmall": false
}
},
"probability": {
"unconfident": false
}
},
"geometry": {
"height": 107.63999999999999,
"width": 169.08000000000004
},
"position": {
"pageNumber": 1,
"x1": 213.12,
"x2": 382.20000000000005,
"y1": 568.7604,
"y2": 676.4004
}
}
]
}
```
### RESPONSE_AS_FILE == True
Creates a respone file on the request storage, named `dossier_Id / file_Id + RESPONSE_FILE_EXTENSION` with the `ANALYSIS_DATA` as content.
## Development
### Local Setup

View File

@ -1,8 +1,8 @@
service:
logging_level: $LOGGING_LEVEL_ROOT|DEBUG # Logging level for service logger
response:
save: True # file-to-storage upload
extension: $RESPONSE_FILE_EXTENSION|".NER_ENTITIES.json.gz" # {.OBJECTS.json.gz | .NER_ENTITIES.json.gz}
save: $RESPONSE_AS_FILE|False # Whether the response is stored as file on storage or sent as stream
extension: $RESPONSE_FILE_EXTENSION|".NER_ENTITIES.json.gz" # {.IMAGE_INFO.json.gz | .NER_ENTITIES.json.gz}
probing_webserver:
host: $PROBING_WEBSERVER_HOST|"0.0.0.0" # Probe webserver address
@ -36,7 +36,7 @@ storage:
backend: $STORAGE_BACKEND|s3 # The type of storage to use {s3, azure}
bucket: $STORAGE_BUCKET|"pyinfra-test-bucket" # The bucket / container to pull files specified in queue requests from
target_file_extension: $TARGET_FILE_EXTENSION|".TEXT.json.gz" # {.TEXT.json.gz | .ORIGIN.pdf.gz} Defines type of file to pull from storage
target_file_extension: $TARGET_FILE_EXTENSION|".ORIGIN.pdf.gz" # {.TEXT.json.gz | .ORIGIN.pdf.gz} Defines type of file to pull from storage
s3:
endpoint: $STORAGE_ENDPOINT|"http://127.0.0.1:9000"

View File

@ -62,15 +62,18 @@ def make_callback_for_output_queue(json_wrapped_body_processor, output_queue_nam
dossier_id, file_id, result = json_wrapped_body_processor(body)
# TODO Unify analysis Repsonse for image-prediction and ner-prediction
if not CONFIG.service.response.save:
channel.basic_publish(exchange="", routing_key=output_queue_name, body=result)
result = { "dossierId": dossier_id, "fileId": file_id, "imageMetadata": result}
result = json.dumps(result)
else:
result = json.dumps(result)
upload_compressed_response(
get_storage(CONFIG.storage.backend), CONFIG.storage.bucket, dossier_id, file_id, result
)
result = json.dumps({"dossierId": dossier_id, "fileId": file_id})
channel.basic_publish(exchange="", routing_key=output_queue_name, body=result)
channel.basic_publish(exchange="", routing_key=output_queue_name, body=result)
channel.basic_ack(delivery_tag=method.delivery_tag)
return callback

View File

@ -62,7 +62,7 @@ def make_payload_processor(load_data, analyze_file):
dossier_id, file_id = itemgetter("dossierId", "fileId")(payload)
data = load_data(payload)
predictions = analyze_file(data)
return dossier_id, file_id, json.dumps(predictions)
return dossier_id, file_id, predictions
except (DataLoadingFailure, AnalysisFailure) as err:
logging.warning(f"Processing of {payload} failed.")
raise ProcessingFailure() from err