From 2b2da1b60ce56fb006cf2f6b65aeda9774391b2a Mon Sep 17 00:00:00 2001 From: Julius Unverfehrt Date: Fri, 12 Aug 2022 13:37:48 +0200 Subject: [PATCH] add new pyinfra, add optional image classifcation under key dataCV if figure metadata is present on storage --- image_prediction/default_objects.py | 6 ------ image_prediction/formatter/formatter.py | 6 +++--- incl/pdf2image | 2 +- incl/pyinfra | 2 +- src/serve.py | 12 ++++++------ 5 files changed, 11 insertions(+), 17 deletions(-) diff --git a/image_prediction/default_objects.py b/image_prediction/default_objects.py index 3ad5579..d66d477 100644 --- a/image_prediction/default_objects.py +++ b/image_prediction/default_objects.py @@ -30,12 +30,6 @@ def get_image_classifier(model_loader, model_identifier): return ImageClassifier(Classifier(EstimatorAdapter(model), ProbabilityMapper(classes))) -# def get_extractor(**kwargs): -# image_extractor = ParsablePDFImageExtractor(**kwargs) -# -# return image_extractor - - def get_dispatched_extract(**kwargs): image_extractor = ParsablePDFImageExtractor(**kwargs) diff --git a/image_prediction/formatter/formatter.py b/image_prediction/formatter/formatter.py index 53306a9..fdf45d1 100644 --- a/image_prediction/formatter/formatter.py +++ b/image_prediction/formatter/formatter.py @@ -21,9 +21,9 @@ class Formatter(Transformer): def format_image_plus(image: ImagePlus) -> ImageMetadataPair: enum_metadata = { - Info.PAGE_WIDTH: image.info.pageInfo.width, - Info.PAGE_HEIGHT: image.info.pageInfo.height, - Info.PAGE_IDX: image.info.pageInfo.number, + Info.PAGE_WIDTH: image.info.pageInfo.pageWidth, + Info.PAGE_HEIGHT: image.info.pageInfo.pageHeight, + Info.PAGE_IDX: image.info.pageInfo.pageNumber, Info.ALPHA: image.info.alpha, Info.WIDTH: image.info.boundingBox.width, Info.HEIGHT: image.info.boundingBox.height, diff --git a/incl/pdf2image b/incl/pdf2image index 6995688..fee8796 160000 --- a/incl/pdf2image +++ b/incl/pdf2image @@ -1 +1 @@ -Subproject commit 699568875683ba727ec9759c8bea85e0d3e1d369 +Subproject commit fee87964cb7da0ea0c19410ca418849744474302 diff --git a/incl/pyinfra b/incl/pyinfra index 0f6512d..be82114 160000 --- a/incl/pyinfra +++ b/incl/pyinfra @@ -1 +1 @@ -Subproject commit 0f6512df5423df98d334f5735170cd1f7642998a +Subproject commit be82114f8302ffedecf950c6ca9fecf01ece5573 diff --git a/src/serve.py b/src/serve.py index 866b37f..719e88e 100644 --- a/src/serve.py +++ b/src/serve.py @@ -31,17 +31,17 @@ def process_request(request_message): object_bytes = storage.get_object(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.{target_file_extension}") object_bytes = gzip.decompress(object_bytes) - try: # TODO: add figure detection file target to request message to avoid this + if storage.exists(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.FIGURE.json.gz"): metadata_bytes = storage.get_object(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.FIGURE.json.gz") metadata_bytes = gzip.decompress(metadata_bytes) metadata_per_image = json.load(io.BytesIO(metadata_bytes))["data"] - logger.info("Metadata acquired") - except: - metadata_per_image = None + classifications_cv = list(pipeline(pdf=object_bytes, metadata_per_image=metadata_per_image)) + else: + classifications_cv = [] - classifications = list(pipeline(pdf=object_bytes, metadata_per_image=metadata_per_image)) + classifications = list(pipeline(pdf=object_bytes)) - result = {**request_message, "data": classifications} + result = {**request_message, "data": classifications, "dataCV": classifications_cv} response_file_extension = request_message["responseFileExtension"] storage_bytes = gzip.compress(json.dumps(result).encode("utf-8"))