add new pyinfra, add optional image classifcation under key dataCV if figure metadata is present on storage

2022-08-12 13:37:48 +02:00 · 2022-08-12 13:37:48 +02:00 · 2b2da1b60c
commit 2b2da1b60c
parent bae25bedbd
5 changed files with 11 additions and 17 deletions
--- a/image_prediction/default_objects.py
+++ b/image_prediction/default_objects.py
@ -30,12 +30,6 @@ def get_image_classifier(model_loader, model_identifier):
    return ImageClassifier(Classifier(EstimatorAdapter(model), ProbabilityMapper(classes)))


-# def get_extractor(**kwargs):
-#     image_extractor = ParsablePDFImageExtractor(**kwargs)
-#
-#     return image_extractor
-
-
 def get_dispatched_extract(**kwargs):
    image_extractor = ParsablePDFImageExtractor(**kwargs)

--- a/image_prediction/formatter/formatter.py
+++ b/image_prediction/formatter/formatter.py
@ -21,9 +21,9 @@ class Formatter(Transformer):

 def format_image_plus(image: ImagePlus) -> ImageMetadataPair:
    enum_metadata = {
-        Info.PAGE_WIDTH: image.info.pageInfo.width,
-        Info.PAGE_HEIGHT: image.info.pageInfo.height,
-        Info.PAGE_IDX: image.info.pageInfo.number,
+        Info.PAGE_WIDTH: image.info.pageInfo.pageWidth,
+        Info.PAGE_HEIGHT: image.info.pageInfo.pageHeight,
+        Info.PAGE_IDX: image.info.pageInfo.pageNumber,
        Info.ALPHA: image.info.alpha,
        Info.WIDTH: image.info.boundingBox.width,
        Info.HEIGHT: image.info.boundingBox.height,
--- a/incl/pdf2image
+++ b/incl/pdf2image
@ -1 +1 @@
-Subproject commit 699568875683ba727ec9759c8bea85e0d3e1d369
+Subproject commit fee87964cb7da0ea0c19410ca418849744474302
--- a/incl/pyinfra
+++ b/incl/pyinfra
@ -1 +1 @@
-Subproject commit 0f6512df5423df98d334f5735170cd1f7642998a
+Subproject commit be82114f8302ffedecf950c6ca9fecf01ece5573
--- a/src/serve.py
+++ b/src/serve.py
@ -31,17 +31,17 @@ def process_request(request_message):
    object_bytes = storage.get_object(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.{target_file_extension}")
    object_bytes = gzip.decompress(object_bytes)

-    try:  # TODO: add figure detection file target to request message to avoid this
+    if storage.exists(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.FIGURE.json.gz"):
        metadata_bytes = storage.get_object(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.FIGURE.json.gz")
        metadata_bytes = gzip.decompress(metadata_bytes)
        metadata_per_image = json.load(io.BytesIO(metadata_bytes))["data"]
-        logger.info("Metadata acquired")
-    except:
-        metadata_per_image = None
+        classifications_cv = list(pipeline(pdf=object_bytes, metadata_per_image=metadata_per_image))
+    else:
+        classifications_cv = []

-    classifications = list(pipeline(pdf=object_bytes, metadata_per_image=metadata_per_image))
+    classifications = list(pipeline(pdf=object_bytes))

-    result = {**request_message, "data": classifications}
+    result = {**request_message, "data": classifications, "dataCV": classifications_cv}

    response_file_extension = request_message["responseFileExtension"]
    storage_bytes = gzip.compress(json.dumps(result).encode("utf-8"))