add new pyinfra, add optional image classifcation under key dataCV if figure metadata is present on storage

This commit is contained in:
Julius Unverfehrt 2022-08-12 13:37:48 +02:00
parent bae25bedbd
commit 2b2da1b60c
5 changed files with 11 additions and 17 deletions

View File

@ -30,12 +30,6 @@ def get_image_classifier(model_loader, model_identifier):
return ImageClassifier(Classifier(EstimatorAdapter(model), ProbabilityMapper(classes)))
# def get_extractor(**kwargs):
# image_extractor = ParsablePDFImageExtractor(**kwargs)
#
# return image_extractor
def get_dispatched_extract(**kwargs):
image_extractor = ParsablePDFImageExtractor(**kwargs)

View File

@ -21,9 +21,9 @@ class Formatter(Transformer):
def format_image_plus(image: ImagePlus) -> ImageMetadataPair:
enum_metadata = {
Info.PAGE_WIDTH: image.info.pageInfo.width,
Info.PAGE_HEIGHT: image.info.pageInfo.height,
Info.PAGE_IDX: image.info.pageInfo.number,
Info.PAGE_WIDTH: image.info.pageInfo.pageWidth,
Info.PAGE_HEIGHT: image.info.pageInfo.pageHeight,
Info.PAGE_IDX: image.info.pageInfo.pageNumber,
Info.ALPHA: image.info.alpha,
Info.WIDTH: image.info.boundingBox.width,
Info.HEIGHT: image.info.boundingBox.height,

@ -1 +1 @@
Subproject commit 699568875683ba727ec9759c8bea85e0d3e1d369
Subproject commit fee87964cb7da0ea0c19410ca418849744474302

@ -1 +1 @@
Subproject commit 0f6512df5423df98d334f5735170cd1f7642998a
Subproject commit be82114f8302ffedecf950c6ca9fecf01ece5573

View File

@ -31,17 +31,17 @@ def process_request(request_message):
object_bytes = storage.get_object(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.{target_file_extension}")
object_bytes = gzip.decompress(object_bytes)
try: # TODO: add figure detection file target to request message to avoid this
if storage.exists(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.FIGURE.json.gz"):
metadata_bytes = storage.get_object(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.FIGURE.json.gz")
metadata_bytes = gzip.decompress(metadata_bytes)
metadata_per_image = json.load(io.BytesIO(metadata_bytes))["data"]
logger.info("Metadata acquired")
except:
metadata_per_image = None
classifications_cv = list(pipeline(pdf=object_bytes, metadata_per_image=metadata_per_image))
else:
classifications_cv = []
classifications = list(pipeline(pdf=object_bytes, metadata_per_image=metadata_per_image))
classifications = list(pipeline(pdf=object_bytes))
result = {**request_message, "data": classifications}
result = {**request_message, "data": classifications, "dataCV": classifications_cv}
response_file_extension = request_message["responseFileExtension"]
storage_bytes = gzip.compress(json.dumps(result).encode("utf-8"))