64 lines
2.0 KiB
Python
64 lines
2.0 KiB
Python
import os
|
|
from itertools import starmap
|
|
|
|
from funcy import rcompose, juxt, compose
|
|
|
|
from image_prediction.classifier.classifier import Classifier
|
|
from image_prediction.classifier.image_classifier import ImageClassifier
|
|
from image_prediction.config import CONFIG
|
|
from image_prediction.estimator.adapter.adapter import EstimatorAdapter
|
|
from image_prediction.extractor_classifier.extractor_classifier import ExtractorClassifier
|
|
from image_prediction.formatter.formatters.enum import EnumFormatter
|
|
from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor
|
|
from image_prediction.label_mapper.mappers.probability import ProbabilityMapper
|
|
from image_prediction.locations import MLRUNS_DIR
|
|
from image_prediction.model_loader.loader import ModelLoader
|
|
from image_prediction.model_loader.loaders.mlflow import MlflowConnector
|
|
from image_prediction.redai_adapter.mlflow import MlflowModelReader
|
|
|
|
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
|
|
|
|
|
def get_image_classifier(model_loader, identifier):
|
|
image_classifier = compose(ImageClassifier, Classifier)(
|
|
*juxt(
|
|
*starmap(
|
|
compose,
|
|
[(EstimatorAdapter, model_loader.load_model), (ProbabilityMapper, model_loader.load_classes)],
|
|
)
|
|
)(identifier)
|
|
)
|
|
|
|
return image_classifier
|
|
|
|
|
|
def get_extractor():
|
|
image_extractor = ParsablePDFImageExtractor(verbose=True)
|
|
|
|
return image_extractor
|
|
|
|
|
|
def get_extractor_classifier(model_loader, identifier):
|
|
|
|
extractor_classifier = ExtractorClassifier(get_extractor(), get_image_classifier(model_loader, identifier))
|
|
|
|
return extractor_classifier
|
|
|
|
|
|
def get_formatter():
|
|
formatter = EnumFormatter()
|
|
|
|
return formatter
|
|
|
|
|
|
class Pipeline:
|
|
def __init__(self):
|
|
|
|
model_loader = ModelLoader(MlflowConnector(MlflowModelReader(MLRUNS_DIR)))
|
|
identifier = CONFIG.service.run_id
|
|
|
|
self.pipe = rcompose(get_extractor_classifier(model_loader, identifier), get_formatter())
|
|
|
|
def __call__(self, pdf: bytes):
|
|
yield from self.pipe(pdf)
|