add new pyinfra, add optional image classifcation under key dataCV if figure metadata is present on storage
This commit is contained in:
parent
bae25bedbd
commit
2b2da1b60c
@ -30,12 +30,6 @@ def get_image_classifier(model_loader, model_identifier):
|
|||||||
return ImageClassifier(Classifier(EstimatorAdapter(model), ProbabilityMapper(classes)))
|
return ImageClassifier(Classifier(EstimatorAdapter(model), ProbabilityMapper(classes)))
|
||||||
|
|
||||||
|
|
||||||
# def get_extractor(**kwargs):
|
|
||||||
# image_extractor = ParsablePDFImageExtractor(**kwargs)
|
|
||||||
#
|
|
||||||
# return image_extractor
|
|
||||||
|
|
||||||
|
|
||||||
def get_dispatched_extract(**kwargs):
|
def get_dispatched_extract(**kwargs):
|
||||||
image_extractor = ParsablePDFImageExtractor(**kwargs)
|
image_extractor = ParsablePDFImageExtractor(**kwargs)
|
||||||
|
|
||||||
|
|||||||
@ -21,9 +21,9 @@ class Formatter(Transformer):
|
|||||||
|
|
||||||
def format_image_plus(image: ImagePlus) -> ImageMetadataPair:
|
def format_image_plus(image: ImagePlus) -> ImageMetadataPair:
|
||||||
enum_metadata = {
|
enum_metadata = {
|
||||||
Info.PAGE_WIDTH: image.info.pageInfo.width,
|
Info.PAGE_WIDTH: image.info.pageInfo.pageWidth,
|
||||||
Info.PAGE_HEIGHT: image.info.pageInfo.height,
|
Info.PAGE_HEIGHT: image.info.pageInfo.pageHeight,
|
||||||
Info.PAGE_IDX: image.info.pageInfo.number,
|
Info.PAGE_IDX: image.info.pageInfo.pageNumber,
|
||||||
Info.ALPHA: image.info.alpha,
|
Info.ALPHA: image.info.alpha,
|
||||||
Info.WIDTH: image.info.boundingBox.width,
|
Info.WIDTH: image.info.boundingBox.width,
|
||||||
Info.HEIGHT: image.info.boundingBox.height,
|
Info.HEIGHT: image.info.boundingBox.height,
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
Subproject commit 699568875683ba727ec9759c8bea85e0d3e1d369
|
Subproject commit fee87964cb7da0ea0c19410ca418849744474302
|
||||||
@ -1 +1 @@
|
|||||||
Subproject commit 0f6512df5423df98d334f5735170cd1f7642998a
|
Subproject commit be82114f8302ffedecf950c6ca9fecf01ece5573
|
||||||
12
src/serve.py
12
src/serve.py
@ -31,17 +31,17 @@ def process_request(request_message):
|
|||||||
object_bytes = storage.get_object(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.{target_file_extension}")
|
object_bytes = storage.get_object(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.{target_file_extension}")
|
||||||
object_bytes = gzip.decompress(object_bytes)
|
object_bytes = gzip.decompress(object_bytes)
|
||||||
|
|
||||||
try: # TODO: add figure detection file target to request message to avoid this
|
if storage.exists(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.FIGURE.json.gz"):
|
||||||
metadata_bytes = storage.get_object(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.FIGURE.json.gz")
|
metadata_bytes = storage.get_object(PYINFRA_CONFIG.storage_bucket, f"{dossier_id}/{file_id}.FIGURE.json.gz")
|
||||||
metadata_bytes = gzip.decompress(metadata_bytes)
|
metadata_bytes = gzip.decompress(metadata_bytes)
|
||||||
metadata_per_image = json.load(io.BytesIO(metadata_bytes))["data"]
|
metadata_per_image = json.load(io.BytesIO(metadata_bytes))["data"]
|
||||||
logger.info("Metadata acquired")
|
classifications_cv = list(pipeline(pdf=object_bytes, metadata_per_image=metadata_per_image))
|
||||||
except:
|
else:
|
||||||
metadata_per_image = None
|
classifications_cv = []
|
||||||
|
|
||||||
classifications = list(pipeline(pdf=object_bytes, metadata_per_image=metadata_per_image))
|
classifications = list(pipeline(pdf=object_bytes))
|
||||||
|
|
||||||
result = {**request_message, "data": classifications}
|
result = {**request_message, "data": classifications, "dataCV": classifications_cv}
|
||||||
|
|
||||||
response_file_extension = request_message["responseFileExtension"]
|
response_file_extension = request_message["responseFileExtension"]
|
||||||
storage_bytes = gzip.compress(json.dumps(result).encode("utf-8"))
|
storage_bytes = gzip.compress(json.dumps(result).encode("utf-8"))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user