Matthias Bisping 9f18ef9cd1 Pull request #13: Image representation info
Merge in RR/image-prediction from image_representation_metadata to master

Squashed commit of the following:

commit bfe92b24a2959a72c0e913ef051476c01c285ad0
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Thu May 12 11:24:12 2022 +0200

    updated comment

commit f5721560f3fda05a8ad45d0b5e406434204c1177
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Thu May 12 11:16:02 2022 +0200

    unskip server predict test

commit 41d94199ede7d58427b9e9541605a94f962c3dc4
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Thu May 12 11:15:48 2022 +0200

    added hash image encoder that produces representations by hashing

commit 84a8b0a290081616240c3876f8db8a1ae8592096
Merge: 1624ee4 6030f40
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date:   Thu May 12 10:18:56 2022 +0200

    Merge branch 'master' of ssh://git.iqser.com:2222/rr/image-prediction

commit 1624ee40376b84a4519025343f913120c464407a
Author: Matthias Bisping <Matthias.Bisping@iqser.com>
Date:   Mon Apr 25 16:51:13 2022 +0200

    Pull request #11: fixed assignment

    Merge in RR/image-prediction from image_prediction_service_overhaul_xref_and_empty_result_fix_fix to master

    Squashed commit of the following:

    commit 7312e57d1127b081bfdc6e96311e8348d3f8110d
    Author: Matthias Bisping <matthias.bisping@iqser.com>
    Date:   Mon Apr 25 16:45:12 2022 +0200

        logging setup changed

    commit 955e353d74f414ee2d57b234bdf84d32817d14bf
    Author: Matthias Bisping <matthias.bisping@iqser.com>
    Date:   Mon Apr 25 16:37:52 2022 +0200

        fixed assignment
2022-05-12 11:49:19 +02:00

72 lines
2.3 KiB
Python

import os
from functools import partial
from itertools import chain, tee
from funcy import rcompose, first, compose, second, chunks, identity, rpartial
from tqdm import tqdm
from image_prediction.config import CONFIG
from image_prediction.default_objects import (
get_formatter,
get_mlflow_model_loader,
get_image_classifier,
get_extractor,
get_encoder,
)
from image_prediction.locations import MLRUNS_DIR
from image_prediction.utils.generic import lift, starlift
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
def load_pipeline(**kwargs):
model_loader = get_mlflow_model_loader(MLRUNS_DIR)
model_identifier = CONFIG.service.mlflow_run_id
pipeline = Pipeline(model_loader, model_identifier, **kwargs)
return pipeline
def parallel(*fs):
return lambda *args: (f(a) for f, a in zip(fs, args))
def star(f):
return lambda x: f(*x)
class Pipeline:
def __init__(self, model_loader, model_identifier, batch_size=16, verbose=True, **kwargs):
self.verbose = verbose
extract = get_extractor(**kwargs)
classifier = get_image_classifier(model_loader, model_identifier)
reformat = get_formatter()
represent = get_encoder()
split = compose(star(parallel(*map(lift, (first, first, second)))), rpartial(tee, 3))
classify = compose(chain.from_iterable, lift(classifier), partial(chunks, batch_size))
pairwise_apply = compose(star, parallel)
join = compose(starlift(lambda prd, rpr, mdt: {"classification": prd, **mdt, "representation": rpr}), star(zip))
# />--classify--\
# --extract-->--split--+->--encode---->+--join-->reformat
# \>--identity--/
self.pipe = rcompose(
extract, # ... image-metadata-pairs as a stream
split, # ... into an image stream and a metadata stream
pairwise_apply(classify, represent, identity), # ... apply functions to the streams pairwise
join, # ... the streams by zipping
reformat, # ... the items
)
def __call__(self, pdf: bytes, page_range: range = None):
yield from tqdm(
self.pipe(pdf, page_range=page_range),
desc="Processing images from document",
unit=" images",
disable=not self.verbose,
)