Adjust pyinfra to give standart image-service response

2022-03-02 09:34:38 +01:00 · 2022-03-02 09:34:38 +01:00 · 7a21a64a56
commit 7a21a64a56
parent 8def5ed4d6
4 changed files with 79 additions and 6 deletions
--- a/README.md
+++ b/README.md
@ -10,6 +10,7 @@ A configuration is located in `/config.yaml`. All relevant variables can be conf
 |-------------------------------|--------------------------------|--------------------------------------------------------------------------------|
 | _service_                     |                                |                                                                                |
 | LOGGING_LEVEL_ROOT            | DEBUG                          | Logging level for service logger                                               |
+| RESPONSE_AS_FILE              | False                          | Whether the response is stored as file on storage or sent as stream            |
 | RESPONSE_FILE_EXTENSION       | ".NER_ENTITIES.json.gz"        | Extension to the file that stores the analyized response on storage            |
 | _probing_webserver_           |                                |                                                                                |
 | PROBING_WEBSERVER_HOST        | "0.0.0.0"                      | Probe webserver address                                                        |
@ -38,6 +39,75 @@ A configuration is located in `/config.yaml`. All relevant variables can be conf
 | STORAGE_SECRET                |                                |                                                                                |
 | STORAGE_AZURECONNECTIONSTRING | "DefaultEndpointsProtocol=..." |                                                                                |

+## Response Format
+
+### RESPONSE_AS_FILE == False
+
+Response-Format:
+
+```json
+{
+  "dossierId": "klaus",
+  "fileId": "1a7fd8ac0da7656a487b68f89188be82",
+  "imageMetadata": ANALYSIS_DATA
+}
+```
+
+Response-example for image-prediction
+
+```json
+{
+  "dossierId": "klaus",
+  "fileId": "1a7fd8ac0da7656a487b68f89188be82",
+  "imageMetadata": [
+    {
+      "classification": {
+        "label": "logo",
+        "probabilities": {
+          "formula": 0.0,
+          "logo": 1.0,
+          "other": 0.0,
+          "signature": 0.0
+        }
+      },
+      "filters": {
+        "allPassed": true,
+        "geometry": {
+          "imageFormat": {
+            "quotient": 1.570791527313267,
+            "tooTall": false,
+            "tooWide": false
+          },
+          "imageSize": {
+            "quotient": 0.19059804229011604,
+            "tooLarge": false,
+            "tooSmall": false
+          }
+        },
+        "probability": {
+          "unconfident": false
+        }
+      },
+      "geometry": {
+        "height": 107.63999999999999,
+        "width": 169.08000000000004
+      },
+      "position": {
+        "pageNumber": 1,
+        "x1": 213.12,
+        "x2": 382.20000000000005,
+        "y1": 568.7604,
+        "y2": 676.4004
+      }
+    }
+  ]
+}
+```
+
+### RESPONSE_AS_FILE == True
+
+Creates a respone file on the request storage, named `dossier_Id / file_Id + RESPONSE_FILE_EXTENSION` with the `ANALYSIS_DATA` as content.
+
 ## Development

 ### Local Setup
--- a/config.yaml
+++ b/config.yaml
@ -1,8 +1,8 @@
 service:
  logging_level: $LOGGING_LEVEL_ROOT|DEBUG  # Logging level for service logger
  response:
-    save: True  # file-to-storage upload
-    extension: $RESPONSE_FILE_EXTENSION|".NER_ENTITIES.json.gz"  # {.OBJECTS.json.gz | .NER_ENTITIES.json.gz}
+    save: $RESPONSE_AS_FILE|False  # Whether the response is stored as file on storage or sent as stream
+    extension: $RESPONSE_FILE_EXTENSION|".NER_ENTITIES.json.gz"  # {.IMAGE_INFO.json.gz | .NER_ENTITIES.json.gz}

 probing_webserver:
  host: $PROBING_WEBSERVER_HOST|"0.0.0.0"  # Probe webserver address
@ -36,7 +36,7 @@ storage:

  backend: $STORAGE_BACKEND|s3  # The type of storage to use {s3, azure}
  bucket: $STORAGE_BUCKET|"pyinfra-test-bucket"  # The bucket / container to pull files specified in queue requests from
-  target_file_extension: $TARGET_FILE_EXTENSION|".TEXT.json.gz"  # {.TEXT.json.gz | .ORIGIN.pdf.gz} Defines type of file to pull from storage
+  target_file_extension: $TARGET_FILE_EXTENSION|".ORIGIN.pdf.gz"  # {.TEXT.json.gz | .ORIGIN.pdf.gz} Defines type of file to pull from storage

  s3:
    endpoint: $STORAGE_ENDPOINT|"http://127.0.0.1:9000"
--- a/pyinfra/callback.py
+++ b/pyinfra/callback.py
@ -62,15 +62,18 @@ def make_callback_for_output_queue(json_wrapped_body_processor, output_queue_nam

        dossier_id, file_id, result = json_wrapped_body_processor(body)

+        # TODO Unify analysis Repsonse for image-prediction and ner-prediction
        if not CONFIG.service.response.save:
-            channel.basic_publish(exchange="", routing_key=output_queue_name, body=result)
+            result = { "dossierId": dossier_id, "fileId": file_id, "imageMetadata": result}
+            result = json.dumps(result)
        else:
+            result = json.dumps(result)
            upload_compressed_response(
                get_storage(CONFIG.storage.backend), CONFIG.storage.bucket, dossier_id, file_id, result
            )
            result = json.dumps({"dossierId": dossier_id, "fileId": file_id})
-            channel.basic_publish(exchange="", routing_key=output_queue_name, body=result)

+        channel.basic_publish(exchange="", routing_key=output_queue_name, body=result)
        channel.basic_ack(delivery_tag=method.delivery_tag)

    return callback
--- a/pyinfra/core.py
+++ b/pyinfra/core.py
@ -62,7 +62,7 @@ def make_payload_processor(load_data, analyze_file):
            dossier_id, file_id = itemgetter("dossierId", "fileId")(payload)
            data = load_data(payload)
            predictions = analyze_file(data)
-            return dossier_id, file_id, json.dumps(predictions)
+            return dossier_id, file_id, predictions
        except (DataLoadingFailure, AnalysisFailure) as err:
            logging.warning(f"Processing of {payload} failed.")
            raise ProcessingFailure() from err