Merge in RR/image-prediction from RED-6084-adhoc-scanned-pages-filtering-refactoring to master
Squashed commit of the following:
commit bd6d83e7363b1c1993babcceb434110a6312c645
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Thu Feb 9 16:08:25 2023 +0100
Tweak logging
commit 55bdd48d2a3462a8b4a6b7194c4a46b21d74c455
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Thu Feb 9 15:47:31 2023 +0100
Update dependencies
commit 970275b25708c05e4fbe78b52aa70d791d5ff17a
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Thu Feb 9 15:35:37 2023 +0100
Refactoring
Make alpha channel check monadic to streamline error handling
commit e99e97e23fd8ce16f9a421d3e5442fccacf71ead
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Tue Feb 7 14:32:29 2023 +0100
Refactoring
- Rename
- Refactor image extraction functions
commit 76b1b0ca2401495ec03ba2b6483091b52732eb81
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Tue Feb 7 11:55:30 2023 +0100
Refactoring
commit cb1c461049d7c43ec340302f466447da9f95a499
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Tue Feb 7 11:44:01 2023 +0100
Refactoring
commit 092069221a85ac7ac19bf838dcbc7ab1fde1e12b
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Tue Feb 7 10:18:53 2023 +0100
Add to-do
commit 3cea4dad2d9703b8c79ddeb740b66a3b8255bb2a
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Tue Feb 7 10:11:35 2023 +0100
Refactoring
- Rename
- Add typehints everywhere
commit 865e0819a14c420bc2edff454d41092c11c019a4
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Mon Feb 6 19:38:57 2023 +0100
Add type explanation
commit 01d3d5d33f1ccb05aea1cec1d1577572b1a4deaa
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Mon Feb 6 19:37:49 2023 +0100
Formatting
commit dffe1c18fc3a322a6b08890d4438844e8122faaf
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Mon Feb 6 19:34:13 2023 +0100
[WIP] Either refactoring
Add alternative formulation for monadic chain
commit 066cf17add404a313520cd794c06e3264cf971c9
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Mon Feb 6 18:40:30 2023 +0100
[WIP] Either refactoring
commit f53f0fea298cdab88deb090af328b34d37e0198e
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Mon Feb 6 18:18:34 2023 +0100
[WIP] Either refactoring
Propagate error and metadata
commit 274a5f56d4fcb9c67fac5cf43e9412ec1ab5179e
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Mon Feb 6 17:51:35 2023 +0100
[WIP] Either refactoring
Fix test assertion
commit 3235a857f6e418e50484cbfff152b0f63efb2f53
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Mon Feb 6 16:57:31 2023 +0100
[WIP] Either-refactoring
Replace Maybe with Either to allow passing on error information or
metadata which otherwise get sucked up by Nothing.
commit 89989543d87490f8b20a0a76055605d34345e8f4
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Mon Feb 6 16:12:40 2023 +0100
[WIP] Monadic refactoring
Integrate image validation step into monadic chain.
At the moment we lost the error information through this. Refactoring to
Either monad can bring it back.
commit 022bd4856a51aa085df5fe983fd77b99b53d594c
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Mon Feb 6 15:16:41 2023 +0100
[WIP] Monadic refactoring
commit ca3898cb539607c8c3dd01c57e60211a5fea8a7d
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Mon Feb 6 15:10:34 2023 +0100
[WIP] Monadic refactoring
commit d8f37bed5cbd6bdd2a0b52bae46fcdbb50f9dff2
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Mon Feb 6 15:09:51 2023 +0100
[WIP] Monadic refactoring
commit 906fee0e5df051f38076aa1d2725e52a182ade13
Author: Matthias Bisping <matthias.bisping@axbit.com>
Date: Mon Feb 6 15:03:35 2023 +0100
[WIP] Monadic refactoring
... and 35 more commits
73 lines
2.3 KiB
Python
73 lines
2.3 KiB
Python
import os
|
|
from functools import partial
|
|
from itertools import chain, tee
|
|
from typing import Iterable
|
|
|
|
from funcy import rcompose, first, compose, second, chunks, identity, rpartial
|
|
from tqdm import tqdm
|
|
|
|
from image_prediction.config import CONFIG
|
|
from image_prediction.default_objects import (
|
|
get_formatter,
|
|
get_mlflow_model_loader,
|
|
get_image_classifier,
|
|
get_extractor,
|
|
get_encoder,
|
|
)
|
|
from image_prediction.locations import MLRUNS_DIR
|
|
from image_prediction.utils.generic import lift, starlift
|
|
|
|
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
|
|
|
|
|
def load_pipeline(**kwargs):
|
|
model_loader = get_mlflow_model_loader(MLRUNS_DIR)
|
|
model_identifier = CONFIG.service.mlflow_run_id
|
|
|
|
pipeline = Pipeline(model_loader, model_identifier, **kwargs)
|
|
|
|
return pipeline
|
|
|
|
|
|
def parallel(*fs):
|
|
return lambda *args: (f(a) for f, a in zip(fs, args))
|
|
|
|
|
|
def star(f):
|
|
return lambda x: f(*x)
|
|
|
|
|
|
class Pipeline:
|
|
def __init__(self, model_loader, model_identifier, batch_size=16, verbose=True, **kwargs):
|
|
self.verbose = verbose
|
|
|
|
extract = get_extractor(**kwargs)
|
|
classifier = get_image_classifier(model_loader, model_identifier)
|
|
reformat = get_formatter()
|
|
represent = get_encoder()
|
|
|
|
split = compose(star(parallel(*map(lift, (first, first, second)))), rpartial(tee, 3))
|
|
classify = compose(chain.from_iterable, lift(classifier), partial(chunks, batch_size))
|
|
pairwise_apply = compose(star, parallel)
|
|
join = compose(starlift(lambda prd, rpr, mdt: {"classification": prd, **mdt, "representation": rpr}), star(zip))
|
|
|
|
# />--classify--\
|
|
# --extract-->--split--+->--encode---->+--join-->reformat
|
|
# \>--identity--/
|
|
|
|
self.pipe = rcompose(
|
|
extract, # ... image-metadata-pairs as a stream
|
|
split, # ... into an image stream and a metadata stream
|
|
pairwise_apply(classify, represent, identity), # ... apply functions to the streams pairwise
|
|
join, # ... the streams by zipping
|
|
reformat, # ... the items
|
|
)
|
|
|
|
def __call__(self, pdf: bytes, page_range: range = None):
|
|
yield from tqdm(
|
|
self.pipe(pdf, page_range=page_range),
|
|
desc="Processing images from document",
|
|
unit=" images",
|
|
disable=not self.verbose,
|
|
)
|