From 2b2da1b60ce56fb006cf2f6b65aeda9774391b2a Mon Sep 17 00:00:00 2001
From: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Fri, 12 Aug 2022 13:37:48 +0200
Subject: [PATCH] add new pyinfra, add optional image classifcation under key
 dataCV if figure metadata is present on storage

---
 image_prediction/default_objects.py     |  6 ------
 image_prediction/formatter/formatter.py |  6 +++---
 incl/pdf2image                          |  2 +-
 incl/pyinfra                            |  2 +-
 src/serve.py                            | 12 ++++++------
 5 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/image_prediction/default_objects.py b/image_prediction/default_objects.py
index 3ad5579..d66d477 100644
--- a/image_prediction/default_objects.py
+++ b/image_prediction/default_objects.py
@@ -30,12 +30,6 @@ def get_image_classifier(model_loader, model_identifier):
     return ImageClassifier(Classifier(EstimatorAdapter(model), ProbabilityMapper(classes)))
 
 
-# def get_extractor(**kwargs):
-#     image_extractor = ParsablePDFImageExtractor(**kwargs)
-#
-#     return image_extractor
-
-
 def get_dispatched_extract(**kwargs):
     image_extractor = ParsablePDFImageExtractor(**kwargs)
 
diff --git a/image_prediction/formatter/formatter.py b/image_prediction/formatter/formatter.py
index 53306a9..fdf45d1 100644
--- a/image_prediction/formatter/formatter.py
+++ b/image_prediction/formatter/formatter.py
@@ -21,9 +21,9 @@ class Formatter(Transformer):
 
 def format_image_plus(image: ImagePlus) -> ImageMetadataPair:
     enum_metadata = {
-        Info.PAGE_WIDTH: image.info.pageInfo.width,
-        Info.PAGE_HEIGHT: image.info.pageInfo.height,
-        Info.PAGE_IDX: image.info.pageInfo.number,
+        Info.PAGE_WIDTH: image.info.pageInfo.pageWidth,
+        Info.PAGE_HEIGHT: image.info.pageInfo.pageHeight,
+        Info.PAGE_IDX: image.info.pageInfo.pageNumber,
         Info.ALPHA: image.info.alpha,
         Info.WIDTH: image.info.boundingBox.width,
         Info.HEIGHT: image.info.boundingBox.height,
diff --git a/incl/pdf2image b/incl/pdf2image
index 6995688..fee8796 160000
--- a/incl/pdf2image
+++ b/incl/pdf2image
@@ -1 +1 @@
-Subproject commit 699568875683ba727ec9759c8bea85e0d3e1d369
+Subproject commit fee87964cb7da0ea0c19410ca418849744474302
diff --git a/incl/pyinfra b/incl/pyinfra
index 0f6512d..be82114 160000
--- a/incl/pyinfra
+++ b/incl/pyinfra
@@ -1 +1 @@
-Subproject commit 0f6512df5423df98d334f5735170cd1f7642998a
+Subproject commit be82114f8302ffedecf950c6ca9fecf01ece5573
diff --git a/src/serve.py b/src/serve.py
index 866b37f..719e88e 100644
--- a/src/serve.py
+++ b/src/serve.py
@@ -31,17 +31,17 @@ def process_request(request_message):
     object_bytes = storage.get_object(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.{target_file_extension}")
     object_bytes = gzip.decompress(object_bytes)
 
-    try:  # TODO: add figure detection file target to request message to avoid this
+    if storage.exists(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.FIGURE.json.gz"):
         metadata_bytes = storage.get_object(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.FIGURE.json.gz")
         metadata_bytes = gzip.decompress(metadata_bytes)
         metadata_per_image = json.load(io.BytesIO(metadata_bytes))["data"]
-        logger.info("Metadata acquired")
-    except:
-        metadata_per_image = None
+        classifications_cv = list(pipeline(pdf=object_bytes, metadata_per_image=metadata_per_image))
+    else:
+        classifications_cv = []
 
-    classifications = list(pipeline(pdf=object_bytes, metadata_per_image=metadata_per_image))
+    classifications = list(pipeline(pdf=object_bytes))
 
-    result = {**request_message, "data": classifications}
+    result = {**request_message, "data": classifications, "dataCV": classifications_cv}
 
     response_file_extension = request_message["responseFileExtension"]
     storage_bytes = gzip.compress(json.dumps(result).encode("utf-8"))