refactoring

alpha channel querying improved
renaming
2022-04-14 12:20:05 +02:00 · 2022-04-13 17:31:33 +02:00 · 2022-04-13 13:36:45 +02:00 · 2022-04-13 13:17:23 +02:00 · 2022-04-13 13:15:05 +02:00 · 2022-04-13 13:12:19 +02:00
118 changed files with 3442 additions and 385 deletions
--- a/.coveragerc
+++ b/.coveragerc
@ -1,6 +1,9 @@
 # .coveragerc to control coverage.py
 [run]
 branch = True
+parallel = True
+command_line = -m pytest
+concurrency = multiprocessing
 omit =
    */site-packages/*
    */distutils/*
@ -11,9 +14,11 @@ omit =
 	*/env/*
 	*/build_venv/*
 	*/build_env/*
+	*/utils/banner.py
+	*/utils/logger.py
+	*/src/*
 source =
    image_prediction
-    src
 relative_files = True
 data_file = .coverage

@ -44,6 +49,10 @@ omit =
 	*/env/*
 	*/build_venv/*
 	*/build_env/*
+	*/utils/banner.py
+	*/utils/logger.py
+	*/src/*
+	*/pdf_annotation.py

 ignore_errors = True

--- a/.dvc/config
+++ b/.dvc/config
@ -1,5 +1,6 @@
 [core]
    remote = vector
+    autostage = true
 ['remote "vector"']
-    url = ssh://vector.iqser.com/research/image_service/
+    url = ssh://vector.iqser.com/research/image-prediction/
    port = 22
--- a/.gitignore
+++ b/.gitignore
@ -32,6 +32,8 @@
 **/classpath-data.json
 **/dependencies-and-licenses-overview.txt

+.coverage
+

 *__pycache__
 *.egg-info*
@ -44,7 +46,7 @@
 *misc

 /coverage_html_report/
-.coverage
+.coverage\.*

 # Created by https://www.toptal.com/developers/gitignore/api/linux,pycharm
 # Edit at https://www.toptal.com/developers/gitignore?templates=linux,pycharm
@ -172,4 +174,4 @@ fabric.properties

 # End of https://www.toptal.com/developers/gitignore/api/linux,pycharm
 /image_prediction/data/mlruns/
-/data/mlruns/
+#/data/mlruns/
--- a/banner.txt
+++ b/banner.txt
@ -0,0 +1,11 @@
+----------------------------------------------------+
+|                                  ___               |
+|                               __/_  `.  .-"""-.    |
+|_._     _,-'""`-._             \_,` | \-'  /   )`-')|
+|(,-.`._,'(       |\`-/|         "") `"`    \  ((`"` |
+|    `-.-' \ )-`( , o o)        ___Y  ,    .'7 /|    |
+|          `-    \`_`"'-       (_,___/...-` (_/_/    |
+|                                                    |
+----------------------------------------------------+
+|            Image Classification Service            |
+----------------------------------------------------+
--- a/config.yaml
+++ b/config.yaml
@ -4,14 +4,14 @@ webserver:
  mode: $SERVER_MODE|production  # webserver mode: {development, production}

 service:
-  logging_level: $LOGGING_LEVEL_ROOT|DEBUG  # Logging level for service logger
+  logging_level: INFO  # Logging level for service logger
  progressbar: True  # Whether a progress bar over the pages of a document is displayed while processing
  batch_size: $BATCH_SIZE|32  # Number of images in memory simultaneously
  verbose: $VERBOSE|True  # Service prints document processing progress to stdout
-  run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7  # The ID of the mlflow run to load the model from
+  run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7  # The ID of the mlflow run to load the service_estimator from


-# These variables control filters that are applied to either images, image metadata or model predictions. The filter
+# These variables control filters that are applied to either images, image metadata or service_estimator predictions. The filter
 # result values are reported in the service responses. For convenience the response to a request contains a
 # "filters.allPassed" field, which is set to false if any of the filters returned values did not meet its specified
 # required value.
--- a/data/.gitignore
+++ b/data/.gitignore
@ -0,0 +1 @@
+/mlruns
--- a/data/base_weights.h5.dvc
+++ b/data/base_weights.h5.dvc
@ -1,4 +0,0 @@
-outs:
- md5: 6d0186c1f25e889d531788f168fa6cf0
-  size: 16727296
-  path: base_weights.h5
--- a/data/mlruns.dvc
+++ b/data/mlruns.dvc
@ -1,5 +1,5 @@
 outs:
- md5: d1c708270bab6fcd344d4a8b05d1103d.dir
-  size: 150225383
-  nfiles: 178
+- md5: 4219c52caf5f87f5a94f1ae00c60fb91.dir
+  size: 166952679
+  nfiles: 179
  path: mlruns
--- a/doc/tests.drawio
+++ b/doc/tests.drawio
@ -0,0 +1 @@
+<mxfile host="app.diagrams.net" modified="2022-03-17T15:35:10.371Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36" etag="b-CbBXg6FXQ9T3Px-oLc" version="17.1.1" type="device"><diagram id="tS3WR_Pr6QhNVK3FqSUP" name="Page-1">1ZZRT6QwEMc/DY8mQHdRX93z9JLbmNzGmNxbQ0daLQzpDrL46a/IsCzinneJcd0XaP+dtsN/fkADscg3V06WeokKbBCHahOIb0Ecnydzf22FphPmyXknZM6oTooGYWWegcWQ1cooWI8CCdGSKcdiikUBKY006RzW47B7tONdS5nBRFil0k7VO6NId+rZPBz0azCZ7neOQh7JZR/MwlpLhfWOJC4DsXCI1LXyzQJs613vSzfv+57RbWIOCvqXCZqW9PBref27aZ7xsQ5vTn/cnvAqT9JW/MCwJuNzR8dZU9Nb4bAqFLSrhYG4qLUhWJUybUdrX3uvacqt70W+yeuCI9jsTTja2uDxAcyBXONDeILonWN04hn366EQUR+jd4qQsCa59tl26cEe32CH/sOt+TueoCONGRbS/kQs2YkHIGoYbFkRvuUTqAmFr1zyu2LlUvhLdjG/HtJlQO/VfOq6AyvJPI3z+HAL4wlwpbp/2V0qODxzUTJmLjo4c8nEkxaWFXcLLPzt4ithKI4BQzHBMOc/l8UvAeLrj9/hQTw9NhBnxwDibB+IB+ZvdvZ5/PnucAx6Gds5S4rLPw==</diagram></mxfile>
--- a/image_prediction/classifier/init.py
+++ b/image_prediction/classifier/init.py
--- a/image_prediction/classifier/classifier.py
+++ b/image_prediction/classifier/classifier.py
@ -0,0 +1,34 @@
+from typing import List, Union, Tuple
+
+import numpy as np
+from PIL.Image import Image
+from funcy import rcompose
+
+from image_prediction.estimator.adapter.adapter import EstimatorAdapter
+from image_prediction.label_mapper.mapper import LabelMapper
+from image_prediction.utils import get_logger
+
+logger = get_logger()
+
+
+class Classifier:
+    def __init__(self, estimator_adapter: EstimatorAdapter, label_mapper: LabelMapper):
+        """Abstraction layer over different estimator backends (e.g. keras or scikit-learn). For each backend to be used
+        an EstimatorAdapter must be implemented.
+
+        Args:
+            estimator_adapter: adapter for a given estimator backend
+        """
+        self.__estimator_adapter = estimator_adapter
+        self.__label_mapper = label_mapper
+        self.__pipe = rcompose(self.__estimator_adapter, self.__label_mapper)
+
+    def predict(self, batch: Union[np.array, Tuple[Image]]) -> List[str]:
+        if not isinstance(batch, tuple) and batch.shape[0] == 0:
+            return []
+
+        return list(self.__pipe(batch))
+
+    def __call__(self, batch: np.array) -> List[str]:
+        logger.debug("Classifier.predict")
+        return self.predict(batch)
--- a/image_prediction/classifier/image_classifier.py
+++ b/image_prediction/classifier/image_classifier.py
@ -0,0 +1,32 @@
+from itertools import chain
+from typing import Iterable
+
+from PIL.Image import Image
+from funcy import rcompose, chunks
+
+from image_prediction.classifier.classifier import Classifier
+from image_prediction.estimator.preprocessor.preprocessor import Preprocessor
+from image_prediction.estimator.preprocessor.preprocessors.identity import IdentityPreprocessor
+from image_prediction.utils import get_logger
+
+logger = get_logger()
+
+
+class ImageClassifier:
+    """Combines a classifier with a preprocessing pipeline: Receives images, chunks into batches, converts to tensors,
+    applies transformations and finally sends to internal classifier.
+    """
+
+    def __init__(self, classifier: Classifier, preprocessor: Preprocessor = None):
+        self.estimator = classifier
+        self.preprocessor = preprocessor if preprocessor else IdentityPreprocessor()
+        self.pipe = rcompose(self.preprocessor, self.estimator)
+
+    def predict(self, images: Iterable[Image], batch_size=16):
+        batches = chunks(batch_size, images)
+        predictions = chain.from_iterable(map(self.pipe, batches))
+        return predictions
+
+    def __call__(self, images: Iterable[Image], batch_size=16):
+        logger.debug("ImageClassifier.predict")
+        yield from self.predict(images, batch_size=batch_size)
--- a/image_prediction/compositor/init.py
+++ b/image_prediction/compositor/init.py
--- a/image_prediction/compositor/compositor.py
+++ b/image_prediction/compositor/compositor.py
@ -0,0 +1,16 @@
+from funcy import rcompose
+
+from image_prediction.transformer.transformer import Transformer
+from image_prediction.utils import get_logger
+
+logger = get_logger()
+
+
+class TransformerCompositor(Transformer):
+    def __init__(self, formatter: Transformer, *formatters: Transformer):
+        formatters = (formatter, *formatters)
+        self.pipe = rcompose(*formatters)
+
+    def transform(self, obj):
+        logger.debug("TransformerCompositor.transform")
+        return self.pipe(obj)
--- a/image_prediction/config.py
+++ b/image_prediction/config.py
@ -18,12 +18,12 @@ class DotIndexable:
    def __getattr__(self, item):
        return _get_item_and_maybe_make_dotindexable(self.x, item)

-    def __setitem__(self, key, value):
-        self.x[key] = value
-
    def __repr__(self):
        return self.x.__repr__()

+    def __getitem__(self, item):
+        return self.__getattr__(item)
+

 class Config:
    def __init__(self, config_path):
--- a/image_prediction/default_objects.py
+++ b/image_prediction/default_objects.py
@ -0,0 +1,47 @@
+from funcy import juxt
+
+from image_prediction.classifier.classifier import Classifier
+from image_prediction.classifier.image_classifier import ImageClassifier
+from image_prediction.compositor.compositor import TransformerCompositor
+from image_prediction.estimator.adapter.adapter import EstimatorAdapter
+from image_prediction.extractor_classifier.extractor_classifier import ExtractorClassifier
+from image_prediction.formatter.formatters.camel_case import Snake2CamelCaseKeyFormatter
+from image_prediction.formatter.formatters.enum import EnumFormatter
+from image_prediction.transformer.transformers.response import ResponseTransformer
+from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor
+from image_prediction.label_mapper.mappers.probability import ProbabilityMapper
+from image_prediction.model_loader.loader import ModelLoader
+from image_prediction.model_loader.loaders.mlflow import MlflowConnector
+from image_prediction.redai_adapter.mlflow import MlflowModelReader
+from image_prediction.transformer.transformers.coordinate.pdfnet import PDFNetCoordinateTransformer
+
+
+def get_mlflow_model_loader(mlruns_dir):
+    model_loader = ModelLoader(MlflowConnector(MlflowModelReader(mlruns_dir)))
+    return model_loader
+
+
+def get_image_classifier(model_loader, model_identifier):
+    model, classes = juxt(model_loader.load_model, model_loader.load_classes)(model_identifier)
+    return ImageClassifier(Classifier(EstimatorAdapter(model), ProbabilityMapper(classes)))
+
+
+def get_extractor(**kwargs):
+    image_extractor = ParsablePDFImageExtractor(**kwargs)
+
+    return image_extractor
+
+
+def get_extractor_classifier(model_loader, model_identifier, **kwargs):
+    extractor_classifier = ExtractorClassifier(
+        get_extractor(**kwargs), get_image_classifier(model_loader, model_identifier)
+    )
+
+    return extractor_classifier
+
+
+def get_formatter():
+    formatter = TransformerCompositor(
+        PDFNetCoordinateTransformer(), EnumFormatter(), ResponseTransformer(), Snake2CamelCaseKeyFormatter()
+    )
+    return formatter
--- a/image_prediction/estimator/init.py
+++ b/image_prediction/estimator/init.py
--- a/image_prediction/estimator/adapter/init.py
+++ b/image_prediction/estimator/adapter/init.py
--- a/image_prediction/estimator/adapter/adapter.py
+++ b/image_prediction/estimator/adapter/adapter.py
@ -0,0 +1,15 @@
+from image_prediction.utils import get_logger
+
+logger = get_logger()
+
+
+class EstimatorAdapter:
+    def __init__(self, estimator):
+        self.estimator = estimator
+
+    def predict(self, batch):
+        return self.estimator(batch)
+
+    def __call__(self, batch):
+        logger.debug("EstimatorAdapter.predict")
+        return self.predict(batch)
--- a/image_prediction/estimator/adapter/adapters/init.py
+++ b/image_prediction/estimator/adapter/adapters/init.py
--- a/image_prediction/estimator/preprocessor/init.py
+++ b/image_prediction/estimator/preprocessor/init.py
--- a/image_prediction/estimator/preprocessor/preprocessor.py
+++ b/image_prediction/estimator/preprocessor/preprocessor.py
@ -0,0 +1,10 @@
+import abc
+
+
+class Preprocessor(abc.ABC):
+    @abc.abstractmethod
+    def preprocess(self, batch):
+        raise NotImplementedError
+
+    def __call__(self, batch):
+        return self.preprocess(batch)
--- a/image_prediction/estimator/preprocessor/preprocessors/init.py
+++ b/image_prediction/estimator/preprocessor/preprocessors/init.py
--- a/image_prediction/estimator/preprocessor/preprocessors/basic.py
+++ b/image_prediction/estimator/preprocessor/preprocessors/basic.py
@ -0,0 +1,10 @@
+from image_prediction.estimator.preprocessor.preprocessor import Preprocessor
+from image_prediction.estimator.preprocessor.utils import images_to_batch_tensor
+
+
+class BasicPreprocessor(Preprocessor):
+    """Converts images to tensors"""
+
+    @staticmethod
+    def preprocess(images):
+        return images_to_batch_tensor(images)
--- a/image_prediction/estimator/preprocessor/preprocessors/identity.py
+++ b/image_prediction/estimator/preprocessor/preprocessors/identity.py
@ -0,0 +1,10 @@
+from image_prediction.estimator.preprocessor.preprocessor import Preprocessor
+
+
+class IdentityPreprocessor(Preprocessor):
+    @staticmethod
+    def preprocess(images):
+        return images
+
+    def __call__(self, images):
+        return self.preprocess(images)
--- a/image_prediction/estimator/preprocessor/utils.py
+++ b/image_prediction/estimator/preprocessor/utils.py
@ -0,0 +1,10 @@
+import numpy as np
+from PIL.Image import Image
+
+
+def image_to_normalized_tensor(image: Image) -> np.ndarray:
+    return np.array(image) / 255
+
+
+def images_to_batch_tensor(images) -> np.ndarray:
+    return np.array(list(map(image_to_normalized_tensor, images)))
--- a/image_prediction/exceptions.py
+++ b/image_prediction/exceptions.py
@ -0,0 +1,34 @@
+class UnknownEstimatorAdapter(ValueError):
+    pass
+
+
+class UnknownImageExtractor(ValueError):
+    pass
+
+
+class UnknownModelLoader(ValueError):
+    pass
+
+
+class UnknownDatabaseType(ValueError):
+    pass
+
+
+class UnknownLabelFormat(ValueError):
+    pass
+
+
+class UnexpectedLabelFormat(ValueError):
+    pass
+
+
+class IncorrectInstantiation(RuntimeError):
+    pass
+
+
+class IntentionalTestException(RuntimeError):
+    pass
+
+
+class InvalidBox(Exception):
+    pass
--- a/image_prediction/extraction.py
+++ b/image_prediction/extraction.py
@ -0,0 +1,13 @@
+from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor
+
+
+def extract_images_from_pdf(pdf, extractor=None):
+
+    if not extractor:
+        extractor = ParsablePDFImageExtractor()
+
+    try:
+        images_extracted, metadata_extracted = zip(*extractor(pdf))
+        return images_extracted, metadata_extracted
+    except ValueError:
+        return [], []
--- a/image_prediction/extractor_classifier/init.py
+++ b/image_prediction/extractor_classifier/init.py
--- a/image_prediction/extractor_classifier/extractor_classifier.py
+++ b/image_prediction/extractor_classifier/extractor_classifier.py
@ -0,0 +1,32 @@
+from itertools import chain
+from typing import Iterable
+
+from funcy import chunks
+
+from image_prediction.classifier.image_classifier import ImageClassifier
+from image_prediction.image_extractor.extractor import ImageExtractor
+
+
+class ExtractorClassifier:
+    """This class is responsible for orchestrating the pairing of classifications and image metadata. It extracts images
+    from an object and classifies them. Then it ties the classification together with the metadata. It returns an
+    iterable of dictionaries, where each dictionary has a field 'label' for the classification and possibly additional
+    fields for metadata -- metadata could be void.
+    """
+
+    def __init__(self, image_extractor: ImageExtractor, image_classifier: ImageClassifier):
+        self.classifier = image_classifier
+        self.extractor = image_extractor
+
+    def __process_batch(self, batch):
+        images, metadata = zip(*batch)
+
+        predictions = self.classifier(images)
+        responses = ({"classification": prd, **mdt} for prd, mdt in zip(predictions, metadata))
+        return responses
+
+    def __call__(self, obj, **kwargs) -> Iterable[dict]:
+        image_metadata_pairs = self.extractor(obj, **kwargs)
+        batches = chunks(16, image_metadata_pairs)
+        predictions = chain.from_iterable(map(self.__process_batch, batches))
+        return predictions
--- a/image_prediction/flask.py
+++ b/image_prediction/flask.py
@ -1,4 +1,5 @@
 import multiprocessing
+import traceback
 from typing import Callable

 from flask import Flask, request, jsonify
@ -8,8 +9,30 @@ from image_prediction.utils import get_logger
 logger = get_logger()


-def make_prediction_server(predict_fn: Callable):
+def run_in_process(func):
+    p = multiprocessing.Process(target=func)
+    p.start()
+    p.join()

+
+def wrap_in_process(func_to_wrap):
+    def build_function_and_run_in_process(*args, **kwargs):
+        def func():
+            try:
+                result = func_to_wrap(*args, **kwargs)
+                return_dict["result"] = result
+            except:
+                logger.error(traceback.format_exc())
+
+        manager = multiprocessing.Manager()
+        return_dict = manager.dict()
+        run_in_process(func)
+        return return_dict.get("result", None)
+
+    return build_function_and_run_in_process
+
+
+def make_prediction_server(predict_fn: Callable):
    app = Flask(__name__)

    @app.route("/ready", methods=["GET"])
@ -24,42 +47,27 @@ def make_prediction_server(predict_fn: Callable):
        resp.status_code = 200
        return resp

-    @app.route("/", methods=["POST"])
+    def __failure():
+        response = jsonify("Analysis failed")
+        response.status_code = 500
+        return response
+
+    @app.route("/predict", methods=["POST"])
    def predict():
-        def predict_fn_wrapper(pdf, return_dict):
-            return_dict["result"] = predict_fn(pdf)

-        def process():
-            # Tensorflow does not free RAM. Workaround is running model in process.
-            # https://stackoverflow.com/questions/39758094/clearing-tensorflow-gpu-memory-after-model-execution
-            pdf = request.data
-            manager = multiprocessing.Manager()
-            return_dict = manager.dict()
-            p = multiprocessing.Process(
-                target=predict_fn_wrapper,
-                args=(
-                    pdf,
-                    return_dict,
-                ),
-            )
-            p.start()
-            p.join()
-            try:
-                return dict(return_dict)["result"]
-            except KeyError:
-                raise
+        # Tensorflow does not free RAM. Workaround: Run prediction function (which instantiates a model) in sub-process.
+        # See: https://stackoverflow.com/questions/39758094/clearing-tensorflow-gpu-memory-after-model-execution
+        predict_fn_wrapped = wrap_in_process(predict_fn)

-        logger.debug("Running predictor on document...")
-        try:
-            predictions = process()
+        logger.info("Analysing...")
+        predictions = predict_fn_wrapped(request.data)
+
+        if predictions:
            response = jsonify(predictions)
            logger.info("Analysis completed.")
            return response
-        except Exception as err:
+        else:
            logger.error("Analysis failed.")
-            logger.exception(err)
-            response = jsonify("Analysis failed.")
-            response.status_code = 500
-            return response
+            return __failure()

    return app
--- a/image_prediction/formatter/init.py
+++ b/image_prediction/formatter/init.py
--- a/image_prediction/formatter/formatter.py
+++ b/image_prediction/formatter/formatter.py
@ -0,0 +1,15 @@
+import abc
+
+from image_prediction.transformer.transformer import Transformer
+
+
+class Formatter(Transformer):
+    @abc.abstractmethod
+    def format(self, obj):
+        raise NotImplementedError
+
+    def transform(self, obj):
+        raise NotImplementedError()
+
+    def __call__(self, obj):
+        return self.format(obj)
--- a/image_prediction/formatter/formatters/init.py
+++ b/image_prediction/formatter/formatters/init.py
--- a/image_prediction/formatter/formatters/camel_case.py
+++ b/image_prediction/formatter/formatters/camel_case.py
@ -0,0 +1,11 @@
+from image_prediction.formatter.formatters.key_formatter import KeyFormatter
+
+
+class Snake2CamelCaseKeyFormatter(KeyFormatter):
+    def format_key(self, key):
+
+        if isinstance(key, str):
+            head, *tail = key.split("_")
+            return head + "".join(map(str.title, tail))
+        else:
+            return key
--- a/image_prediction/formatter/formatters/enum.py
+++ b/image_prediction/formatter/formatters/enum.py
@ -0,0 +1,23 @@
+from enum import Enum
+
+from image_prediction.formatter.formatters.key_formatter import KeyFormatter
+
+
+class EnumFormatter(KeyFormatter):
+    def format_key(self, key):
+        return key.value if isinstance(key, Enum) else key
+
+    def transform(self, obj):
+        raise NotImplementedError
+
+
+class ReverseEnumFormatter(KeyFormatter):
+    def __init__(self, enum):
+        self.enum = enum
+        self.reverse_enum = {e.value: e for e in enum}
+
+    def format_key(self, key):
+        return self.reverse_enum.get(key, key)
+
+    def transform(self, obj):
+        raise NotImplementedError
--- a/image_prediction/formatter/formatters/identity.py
+++ b/image_prediction/formatter/formatters/identity.py
@ -0,0 +1,6 @@
+from image_prediction.formatter.formatter import Formatter
+
+
+class IdentityFormatter(Formatter):
+    def format(self, obj):
+        return obj
--- a/image_prediction/formatter/formatters/key_formatter.py
+++ b/image_prediction/formatter/formatters/key_formatter.py
@ -0,0 +1,28 @@
+import abc
+from typing import Iterable
+
+from image_prediction.formatter.formatter import Formatter
+
+
+class KeyFormatter(Formatter):
+    @abc.abstractmethod
+    def format_key(self, key):
+        raise NotImplementedError
+
+    def __format(self, data):
+
+        # If we wanted to do this properly, we would need handlers for all expected types and dispatch based
+        # on a type comparison. This is too much engineering for the limited use-case of this class though.
+        if isinstance(data, Iterable) and not isinstance(data, dict) and not isinstance(data, str):
+            f = map(self.__format, data)
+            return type(data)(f) if not isinstance(data, map) else f
+
+        if not isinstance(data, dict):
+            return data
+
+        keys_formatted = list(map(self.format_key, data))
+
+        return dict(zip(keys_formatted, map(self.__format, data.values())))
+
+    def format(self, data):
+        return self.__format(data)
--- a/image_prediction/image_extractor/init.py
+++ b/image_prediction/image_extractor/init.py
--- a/image_prediction/image_extractor/extractor.py
+++ b/image_prediction/image_extractor/extractor.py
@ -0,0 +1,19 @@
+import abc
+from collections import namedtuple
+from typing import Iterable
+
+from image_prediction.utils import get_logger
+
+ImageMetadataPair = namedtuple("ImageMetadataPair", ["image", "metadata"])
+
+logger = get_logger()
+
+
+class ImageExtractor(abc.ABC):
+    @abc.abstractmethod
+    def extract(self, obj) -> Iterable[ImageMetadataPair]:
+        raise NotImplementedError
+
+    def __call__(self, obj, **kwargs):
+        logger.debug("ImageExtractor.extract")
+        return self.extract(obj, **kwargs)
--- a/image_prediction/image_extractor/extractors/init.py
+++ b/image_prediction/image_extractor/extractors/init.py
--- a/image_prediction/image_extractor/extractors/mock.py
+++ b/image_prediction/image_extractor/extractors/mock.py
@ -0,0 +1,7 @@
+from image_prediction.image_extractor.extractor import ImageExtractor, ImageMetadataPair
+
+
+class ImageExtractorMock(ImageExtractor):
+    def extract(self, image_container):
+        for i, image in enumerate(image_container):
+            yield ImageMetadataPair(image, {"image_id": i})
--- a/image_prediction/image_extractor/extractors/parsable.py
+++ b/image_prediction/image_extractor/extractors/parsable.py
@ -0,0 +1,181 @@
+import atexit
+import io
+from functools import partial, lru_cache
+from itertools import chain, starmap, filterfalse, repeat
+from operator import itemgetter
+from typing import List
+
+import fitz
+from PIL import Image
+from funcy import rcompose, merge, zipdict
+from tqdm import tqdm
+
+from image_prediction.image_extractor.extractor import ImageExtractor, ImageMetadataPair
+from image_prediction.info import Info
+from image_prediction.stitching.stitching import stitch_pairs
+from image_prediction.stitching.utils import validate_box_coords, validate_box_size
+
+
+class ParsablePDFImageExtractor(ImageExtractor):
+    def __init__(self, verbose=False, tolerance=0):
+        """
+
+        Args:
+            verbose: Whether to show progressbar
+            tolerance: The tolerance in pixels for the distance images beyond which they will not be stitched together
+        """
+        self.doc: fitz.fitz.Document = None
+        self.verbose = verbose
+        self.tolerance = tolerance
+
+    def extract(self, pdf: bytes, page_range: range = None):
+        self.doc = fitz.Document(stream=pdf)
+
+        pages = extract_pages(self.doc, page_range) if page_range else self.doc
+
+        image_metadata_pairs = chain.from_iterable(
+            map(
+                self.__process_images_on_page,
+                tqdm(pages, desc="Extracting", disable=not self.verbose, total=len(page_range) if page_range else None),
+            )
+        )
+
+        yield from image_metadata_pairs
+
+    def __process_images_on_page(self, page: fitz.fitz.Page):
+        images = get_images_on_page(self.doc, page)
+        metadata = get_metadata_for_images_on_page(self.doc, page)
+        clear_caches()
+
+        image_metadata_pairs = starmap(ImageMetadataPair, filter(all, zip(images, metadata)))
+        image_metadata_pairs = stitch_pairs(list(image_metadata_pairs), tolerance=self.tolerance)
+
+        yield from image_metadata_pairs
+
+
+def extract_pages(doc, page_range):
+    page_range = range(page_range.start + 1, page_range.stop + 1)
+    pages = map(doc.load_page, page_range)
+
+    return pages
+
+
+@lru_cache(maxsize=None)
+def get_images_on_page(doc, page: fitz.Page):
+    image_infos = get_image_infos(page)
+    xrefs = map(itemgetter("xref"), image_infos)
+    images = map(partial(xref_to_image, doc), xrefs)
+
+    return images
+
+
+def get_metadata_for_images_on_page(doc, page: fitz.Page):
+
+    metadata = map(get_image_metadata, get_image_infos(page))
+    metadata = validate_coords_and_passthrough(metadata)
+
+    metadata = filter_out_tiny_images(metadata)
+    metadata = validate_size_and_passthrough(metadata)
+
+    metadata = add_page_metadata(page, metadata)
+
+    metadata = add_alpha_channel_info(doc, page, metadata)
+
+    yield from metadata
+
+
+@lru_cache(maxsize=None)
+def get_image_infos(page: fitz.Page) -> List[dict]:
+    return page.get_image_info(xrefs=True)
+
+
+@lru_cache(maxsize=None)
+def xref_to_image(doc, xref) -> Image:
+    maybe_image = load_image_handle_from_xref(doc, xref)
+    return Image.open(io.BytesIO(maybe_image["image"])) if maybe_image else None
+
+
+def get_image_metadata(image_info):
+
+    x1, y1, x2, y2 = map(rounder, image_info["bbox"])
+
+    width = abs(x2 - x1)
+    height = abs(y2 - y1)
+
+    return {
+        Info.WIDTH: width,
+        Info.HEIGHT: height,
+        Info.X1: x1,
+        Info.X2: x2,
+        Info.Y1: y1,
+        Info.Y2: y2,
+    }
+
+
+def validate_coords_and_passthrough(metadata):
+    yield from map(validate_box_coords, metadata)
+
+
+def filter_out_tiny_images(metadata):
+    return filterfalse(tiny, metadata)
+
+
+def validate_size_and_passthrough(metadata):
+    yield from map(validate_box_size, metadata)
+
+
+def add_page_metadata(page, metadata):
+    return map(partial(merge, get_page_metadata(page)), metadata)
+
+
+def add_alpha_channel_info(doc, page, metadata):
+    xrefs = map(itemgetter("xref"), get_image_infos(page))
+    alpha = map(partial(has_alpha_channel, doc), xrefs)
+    alpha = ({Info.ALPHA: a} for a in alpha)
+    # alpha = map(dict, zip(repeat(Info.ALPHA), alpha))
+    metadata = starmap(merge, zip(alpha, metadata))
+
+    return metadata
+
+
+@lru_cache(maxsize=None)
+def load_image_handle_from_xref(doc, xref):
+    return doc.extract_image(xref)
+
+
+rounder = rcompose(round, int)
+
+
+def get_page_metadata(page):
+    page_width, page_height = map(rounder, page.mediabox_size)
+
+    return {
+        Info.PAGE_WIDTH: page_width,
+        Info.PAGE_HEIGHT: page_height,
+        Info.PAGE_IDX: page.number,
+    }
+
+
+def has_alpha_channel(doc, xref):
+
+    maybe_image = load_image_handle_from_xref(doc, xref)
+    maybe_smask = maybe_image["smask"] if maybe_image else None
+
+    if maybe_smask:
+        return any([doc.extract_image(maybe_smask) is not None, bool(fitz.Pixmap(doc, maybe_smask).alpha)])
+    else:
+        return bool(fitz.Pixmap(doc, xref).alpha)
+
+
+def tiny(metadata):
+    return metadata[Info.WIDTH] * metadata[Info.HEIGHT] <= 4
+
+
+def clear_caches():
+    get_image_infos.cache_clear()
+    load_image_handle_from_xref.cache_clear()
+    get_images_on_page.cache_clear()
+    xref_to_image.cache_clear()
+
+
+atexit.register(clear_caches)
--- a/image_prediction/info.py
+++ b/image_prediction/info.py
@ -0,0 +1,14 @@
+from enum import Enum
+
+
+class Info(Enum):
+    PAGE_WIDTH = "page_width"
+    PAGE_HEIGHT = "page_height"
+    PAGE_IDX = "page_idx"
+    WIDTH = "width"
+    HEIGHT = "height"
+    X1 = "x1"
+    X2 = "x2"
+    Y1 = "y1"
+    Y2 = "y2"
+    ALPHA = "alpha"
--- a/image_prediction/label_mapper/init.py
+++ b/image_prediction/label_mapper/init.py
--- a/image_prediction/label_mapper/mapper.py
+++ b/image_prediction/label_mapper/mapper.py
@ -0,0 +1,10 @@
+import abc
+
+
+class LabelMapper(abc.ABC):
+    @abc.abstractmethod
+    def map_labels(self, items):
+        raise NotImplementedError
+
+    def __call__(self, items):
+        return self.map_labels(items)
--- a/image_prediction/label_mapper/mappers/init.py
+++ b/image_prediction/label_mapper/mappers/init.py
--- a/image_prediction/label_mapper/mappers/numeric.py
+++ b/image_prediction/label_mapper/mappers/numeric.py
@ -0,0 +1,20 @@
+from typing import Mapping, Iterable
+
+from image_prediction.exceptions import UnexpectedLabelFormat
+from image_prediction.label_mapper.mapper import LabelMapper
+
+
+class IndexMapper(LabelMapper):
+    def __init__(self, labels: Mapping[int, str]):
+        self.__labels = labels
+
+    def __validate_index_label_format(self, index_label: int) -> None:
+        if not 0 <= index_label < len(self.__labels):
+            raise UnexpectedLabelFormat(f"Received index label  '{index_label}' that has no associated string label.")
+
+    def __map_label(self, index_label: int) -> str:
+        self.__validate_index_label_format(index_label)
+        return self.__labels[index_label]
+
+    def map_labels(self, index_labels: Iterable[int]) -> Iterable[str]:
+        return map(self.__map_label, index_labels)
--- a/image_prediction/label_mapper/mappers/probability.py
+++ b/image_prediction/label_mapper/mappers/probability.py
@ -0,0 +1,39 @@
+from enum import Enum
+from operator import itemgetter
+from typing import Mapping, Iterable
+
+import numpy as np
+from funcy import rcompose, rpartial
+
+from image_prediction.exceptions import UnexpectedLabelFormat
+from image_prediction.label_mapper.mapper import LabelMapper
+
+
+class ProbabilityMapperKeys(Enum):
+    LABEL = "label"
+    PROBABILITIES = "probabilities"
+
+
+class ProbabilityMapper(LabelMapper):
+    def __init__(self, labels: Mapping[int, str]):
+        self.__labels = labels
+        # String conversion in the middle due to floating point precision issues.
+        # See: https://stackoverflow.com/questions/56820/round-doesnt-seem-to-be-rounding-properly
+        self.__rounder = rcompose(rpartial(round, 4), str, float)
+
+    def __validate_array_label_format(self, probabilities: np.ndarray) -> None:
+        if not len(probabilities) == len(self.__labels):
+            raise UnexpectedLabelFormat(
+                f"Received fewer probabilities ({len(probabilities)}) than labels were passed ({len(self.__labels)})."
+            )
+
+    def __map_array(self, probabilities: np.ndarray) -> dict:
+        self.__validate_array_label_format(probabilities)
+        cls2prob = dict(
+            sorted(zip(self.__labels, list(map(self.__rounder, probabilities))), key=itemgetter(1), reverse=True)
+        )
+        most_likely = [*cls2prob][0]
+        return {ProbabilityMapperKeys.LABEL: most_likely, ProbabilityMapperKeys.PROBABILITIES: cls2prob}
+
+    def map_labels(self, probabilities: Iterable[np.ndarray]) -> Iterable[dict]:
+        return map(self.__map_array, probabilities)
--- a/image_prediction/locations.py
+++ b/image_prediction/locations.py
@ -1,10 +1,17 @@
-from os import path
+"""Defines constant paths relative to the module root path."""

-MODULE_DIR = path.dirname(path.abspath(__file__))
-PACKAGE_ROOT_DIR = path.dirname(MODULE_DIR)
+from pathlib import Path

-CONFIG_FILE = path.join(PACKAGE_ROOT_DIR, "config.yaml")
+MODULE_DIR = Path(__file__).resolve().parents[0]

-DATA_DIR = path.join(PACKAGE_ROOT_DIR, "data")
-MLRUNS_DIR = path.join(DATA_DIR, "mlruns")
-BASE_WEIGHTS = path.join(DATA_DIR, "base_weights.h5")
+PACKAGE_ROOT_DIR = MODULE_DIR.parents[0]
+
+CONFIG_FILE = PACKAGE_ROOT_DIR / "config.yaml"
+
+BANNER_FILE = PACKAGE_ROOT_DIR / "banner.txt"
+
+DATA_DIR = PACKAGE_ROOT_DIR / "data"
+
+MLRUNS_DIR = str(DATA_DIR / "mlruns")
+
+TEST_DATA_DIR = PACKAGE_ROOT_DIR / "test" / "data"
--- a/image_prediction/model_loader/init.py
+++ b/image_prediction/model_loader/init.py
--- a/image_prediction/model_loader/database/init.py
+++ b/image_prediction/model_loader/database/init.py
--- a/image_prediction/model_loader/database/connector.py
+++ b/image_prediction/model_loader/database/connector.py
@ -0,0 +1,7 @@
+import abc
+
+
+class DatabaseConnector(abc.ABC):
+    @abc.abstractmethod
+    def get_object(self, identifier):
+        raise NotImplementedError
--- a/image_prediction/model_loader/database/connectors/init.py
+++ b/image_prediction/model_loader/database/connectors/init.py
--- a/image_prediction/model_loader/database/connectors/mock.py
+++ b/image_prediction/model_loader/database/connectors/mock.py
@ -0,0 +1,9 @@
+from image_prediction.model_loader.database.connector import DatabaseConnector
+
+
+class DatabaseConnectorMock(DatabaseConnector):
+    def __init__(self, store: dict):
+        self.store = store
+
+    def get_object(self, identifier):
+        return self.store[identifier]
--- a/image_prediction/model_loader/loader.py
+++ b/image_prediction/model_loader/loader.py
@ -0,0 +1,18 @@
+from functools import lru_cache
+
+from image_prediction.model_loader.database.connector import DatabaseConnector
+
+
+class ModelLoader:
+    def __init__(self, database_connector: DatabaseConnector):
+        self.database_connector = database_connector
+
+    @lru_cache(maxsize=None)
+    def __get_object(self, identifier):
+        return self.database_connector.get_object(identifier)
+
+    def load_model(self, identifier):
+        return self.__get_object(identifier)["model"]
+
+    def load_classes(self, identifier):
+        return self.__get_object(identifier)["classes"]
--- a/image_prediction/model_loader/loaders/init.py
+++ b/image_prediction/model_loader/loaders/init.py
--- a/image_prediction/model_loader/loaders/mlflow.py
+++ b/image_prediction/model_loader/loaders/mlflow.py
@ -0,0 +1,10 @@
+from image_prediction.model_loader.database.connector import DatabaseConnector
+from image_prediction.redai_adapter.mlflow import MlflowModelReader
+
+
+class MlflowConnector(DatabaseConnector):
+    def __init__(self, mlflow_reader: MlflowModelReader):
+        self.mlflow_reader = mlflow_reader
+
+    def get_object(self, run_id):
+        return self.mlflow_reader[run_id]
--- a/image_prediction/pipeline.py
+++ b/image_prediction/pipeline.py
@ -0,0 +1,26 @@
+import os
+
+from funcy import rcompose
+
+from image_prediction.config import CONFIG
+from image_prediction.default_objects import get_extractor_classifier, get_formatter, get_mlflow_model_loader
+from image_prediction.locations import MLRUNS_DIR
+
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+
+
+def load_pipeline(**kwargs):
+    model_loader = get_mlflow_model_loader(MLRUNS_DIR)
+    model_identifier = CONFIG.service.run_id
+
+    pipeline = Pipeline(model_loader, model_identifier, **kwargs)
+
+    return pipeline
+
+
+class Pipeline:
+    def __init__(self, model_loader, model_identifier, **kwargs):
+        self.pipe = rcompose(get_extractor_classifier(model_loader, model_identifier, **kwargs), get_formatter())
+
+    def __call__(self, pdf: bytes, page_range: range = None):
+        yield from self.pipe(pdf, page_range=page_range)
--- a/image_prediction/predictor.py
+++ b/image_prediction/predictor.py
@ -1,122 +0,0 @@
-from itertools import chain
-from operator import itemgetter
-from typing import List, Dict, Iterable
-
-import numpy as np
-
-from image_prediction.config import CONFIG
-from image_prediction.locations import MLRUNS_DIR, BASE_WEIGHTS
-from image_prediction.utils import temporary_pdf_file, get_logger
-from incl.redai_image.redai.redai.backend.model.model_handle import ModelHandle
-from incl.redai_image.redai.redai.backend.pdf.image_extraction import extract_and_stitch
-from incl.redai_image.redai.redai.utils.mlflow_reader import MlflowModelReader
-from incl.redai_image.redai.redai.utils.shared import chunk_iterable
-
-logger = get_logger()
-
-
-class Predictor:
-    """`ModelHandle` wrapper. Forwards to wrapped model handle for prediction and produces structured output that is
-    interpretable independently of the wrapped model (e.g. with regard to a .classes_ attribute).
-    """
-
-    def __init__(self, model_handle: ModelHandle = None):
-        """Initializes a ServiceEstimator.
-
-        Args:
-            model_handle: ModelHandle object to forward to for prediction. By default, a model handle is loaded from the
-                mlflow database via CONFIG.service.run_id.
-        """
-        try:
-            if model_handle is None:
-                reader = MlflowModelReader(run_id=CONFIG.service.run_id, mlruns_dir=MLRUNS_DIR)
-                self.model_handle = reader.get_model_handle(BASE_WEIGHTS)
-            else:
-                self.model_handle = model_handle
-
-            self.classes = self.model_handle.model.classes_
-            self.classes_readable = np.array(self.model_handle.classes)
-            self.classes_readable_aligned = self.classes_readable[self.classes[list(range(len(self.classes)))]]
-        except Exception as e:
-            logger.info(f"Service estimator initialization failed: {e}")
-
-    def __make_predictions_human_readable(self, probs: np.ndarray) -> List[Dict[str, float]]:
-        """Translates an n x m matrix of probabilities over classes into an n-element list of mappings from classes to
-        probabilities.
-
-        Args:
-            probs: probability matrix (items x classes)
-
-        Returns:
-            list of mappings from classes to probabilities.
-        """
-        classes = np.argmax(probs, axis=1)
-        classes = self.classes[classes]
-        classes_readable = [self.model_handle.classes[c] for c in classes]
-        return classes_readable
-
-    def predict(self, images: List, probabilities: bool = False, **kwargs):
-        """Gathers predictions for list of images. Assigns each image a class and optionally a probability distribution
-        over all classes.
-
-        Args:
-            images (List[PIL.Image]) : Images to gather predictions for.
-            probabilities: Whether to return dictionaries of the following form instead of strings:
-                {
-                    "class": predicted class,
-                    "probabilities": {
-                        "class 1" : class 1 probability,
-                        "class 2" : class 2 probability,
-                         ...
-                    }
-                }
-
-        Returns:
-            By default the return value is a list of classes (meaningful class name strings). Alternatively a list of
-            dictionaries with an additional probability field for estimated class probabilities per image can be
-            returned.
-        """
-        X = self.model_handle.prep_images(list(images))
-
-        probs_per_item = self.model_handle.model.predict_proba(X, **kwargs).astype(float)
-        classes = self.__make_predictions_human_readable(probs_per_item)
-
-        class2prob_per_item = [dict(zip(self.classes_readable_aligned, probs)) for probs in probs_per_item]
-        class2prob_per_item = [
-            dict(sorted(c2p.items(), key=itemgetter(1), reverse=True)) for c2p in class2prob_per_item
-        ]
-
-        predictions = [{"class": c, "probabilities": c2p} for c, c2p in zip(classes, class2prob_per_item)]
-
-        return predictions if probabilities else classes
-
-    def predict_pdf(self, pdf, verbose=False):
-        with temporary_pdf_file(pdf) as pdf_path:
-            image_metadata_pairs = self.__extract_image_metadata_pairs(pdf_path, verbose=verbose)
-            return self.__predict_images(image_metadata_pairs)
-
-    def __predict_images(self, image_metadata_pairs: Iterable, batch_size: int = CONFIG.service.batch_size):
-        def process_chunk(chunk):
-            images, metadata = zip(*chunk)
-            predictions = self.predict(images, probabilities=True)
-            return predictions, metadata
-
-        def predict(image_metadata_pair_generator):
-            chunks = chunk_iterable(image_metadata_pair_generator, n=batch_size)
-            return map(chain.from_iterable, zip(*map(process_chunk, chunks)))
-
-        try:
-            predictions, metadata = predict(image_metadata_pairs)
-            return predictions, metadata
-
-        except ValueError:
-            return [], []
-
-    @staticmethod
-    def __extract_image_metadata_pairs(pdf_path: str, **kwargs):
-        def image_is_large_enough(metadata: dict):
-            x1, x2, y1, y2 = itemgetter("x1", "x2", "y1", "y2")(metadata)
-
-            return abs(x1 - x2) > 2 and abs(y1 - y2) > 2
-
-        yield from extract_and_stitch(pdf_path, convert_to_rgb=True, filter_fn=image_is_large_enough, **kwargs)
--- a/image_prediction/redai_adapter/init.py
+++ b/image_prediction/redai_adapter/init.py
--- a/image_prediction/redai_adapter/efficient_net_wrapper.py
+++ b/image_prediction/redai_adapter/efficient_net_wrapper.py
@ -0,0 +1,45 @@
+import tensorflow as tf
+
+from image_prediction.redai_adapter.model_wrapper import ModelWrapper
+
+
+class EfficientNetWrapper(ModelWrapper):
+    def __init__(self, classes, base_weights_path=None, weights_path=None):
+        self.__input_shape = (224, 224, 3)
+        super().__init__(classes=classes, base_weights_path=base_weights_path, weights_path=weights_path)
+
+    @property
+    def input_shape(self):
+        return self.__input_shape
+
+    def _ModelWrapper__preprocess_tensor(self, tensor):
+        return tf.keras.applications.efficientnet.preprocess_input(tensor)
+
+    def _ModelWrapper__build(self, base_weights=None) -> tf.keras.models.Model:
+        input_img = tf.keras.layers.Input(shape=self.input_shape)
+
+        pretrained = tf.keras.applications.efficientnet.EfficientNetB0(
+            include_top=False, input_tensor=tf.keras.layers.Input(shape=self.input_shape), weights=base_weights
+        )
+
+        pretrained.trainable = False
+
+        for layer in pretrained.layers:
+            layer.trainable = False
+
+        pretrained = pretrained(input_img)
+
+        finetuned = tf.keras.layers.Flatten()(pretrained)
+        finetuned = tf.keras.layers.Dense(512, activation="relu")(finetuned)
+        finetuned = tf.keras.layers.Dropout(0.2)(finetuned)
+        finetuned = tf.keras.layers.Dense(128, activation="relu")(finetuned)
+        finetuned = tf.keras.layers.Dropout(0.2)(finetuned)
+        finetuned = tf.keras.layers.Dense(32, activation="relu")(finetuned)
+        finetuned = tf.keras.layers.Dropout(0.2)(finetuned)
+        finetuned = tf.keras.layers.Dense(len(self.classes), activation="softmax")(finetuned)
+
+        model = tf.keras.models.Model(inputs=input_img, outputs=finetuned)
+
+        model.compile()
+
+        return model
--- a/image_prediction/redai_adapter/mlflow.py
+++ b/image_prediction/redai_adapter/mlflow.py
@ -0,0 +1,72 @@
+import importlib
+import json
+import os
+from functools import lru_cache
+
+import mlflow
+
+from image_prediction.redai_adapter.model import PredictionModelHandle
+
+
+class MlflowModelReader:
+    def __init__(self, mlruns_dir=None):
+        self.mlruns_dir = mlruns_dir
+        mlflow.set_tracking_uri(self.mlruns_dir)
+
+    @staticmethod
+    def __correct_artifact_uri(run_artifact_uri, base_path):
+        _, suffix = run_artifact_uri.split("mlruns/")
+        return os.path.join(base_path, suffix)
+
+    def __get_weights_path(self, run_id, prefix="tt"):
+        run = self.__get_run(run_id)
+
+        artifact_uri = self.__correct_artifact_uri(run.info.to_proto().artifact_uri, self.mlruns_dir)
+        path = os.path.join(artifact_uri, prefix, "train_dev", "estimator")
+
+        base_path = os.path.join(path, "base_weights.h5")
+        weights_path = os.path.join(path, "weights.h5")
+
+        return base_path, weights_path
+
+    @lru_cache(maxsize=None)
+    def __get_run(self, run_id):
+        return mlflow.get_run(run_id)
+
+    def __get_classes(self, run_id, prefix="tt"):
+        run = self.__get_run(run_id)
+
+        classes = json.loads(run.data.params[os.path.join(prefix, "train_dev/estimator/classes")].replace("'", '"'))
+
+        return classes
+
+    def __get_model_handle(self, run_id):
+        run = self.__get_run(run_id)
+
+        model_handle_builder = load_object(run.data.params["model_handle_builder"].strip())
+
+        base_weights_path, weights_path = self.__get_weights_path(run_id)
+
+        model_handle = model_handle_builder(
+            self.__get_classes(run_id), base_weights_path=base_weights_path, weights_path=weights_path
+        )
+
+        return model_handle
+
+    def __get_model(self, run_id) -> PredictionModelHandle:
+        model_handle = self.__get_model_handle(run_id)
+        model = PredictionModelHandle(model_handle)
+        return model
+
+    def __getitem__(self, run_id):
+        return {"model": self.__get_model(run_id), "classes": self.__get_classes(run_id)}
+
+
+def load_object(object_path):
+    path_fragments = object_path.split(".")
+
+    module_path = ".".join(path_fragments[:-1])
+    object_name = path_fragments[-1]
+
+    module = importlib.import_module(module_path)
+    return getattr(module, object_name)
--- a/image_prediction/redai_adapter/model.py
+++ b/image_prediction/redai_adapter/model.py
@ -0,0 +1,19 @@
+from funcy import rcompose
+
+from image_prediction.utils import get_logger
+
+logger = get_logger()
+
+
+class PredictionModelHandle:
+    """Simplifies usage of ModelHandle instances for prediction purposes."""
+
+    def __init__(self, model_handle):
+        self.__predict = rcompose(model_handle.prep_images, model_handle.model.predict)
+
+    def predict(self, *args, **kwargs):
+        return self.__predict(*args, **kwargs)
+
+    def __call__(self, *args, **kwargs):
+        logger.debug("PredictionModelHandle.predict")
+        return self.predict(*args, **kwargs)
--- a/image_prediction/redai_adapter/model_wrapper.py
+++ b/image_prediction/redai_adapter/model_wrapper.py
@ -0,0 +1,42 @@
+import abc
+
+import numpy as np
+import tensorflow as tf
+
+
+class ModelWrapper(abc.ABC):
+    def __init__(self, classes, base_weights_path=None, weights_path=None):
+        self.__classes = classes
+        self.model = self.__build(base_weights_path)
+        self.model.load_weights(weights_path)
+
+    @property
+    @abc.abstractmethod
+    def input_shape(self):
+        raise NotImplementedError
+
+    @property
+    def classes(self):
+        return self.__classes
+
+    @abc.abstractmethod
+    def __preprocess_tensor(self, tensor):
+        raise NotImplementedError
+
+    @staticmethod
+    def __images_to_tensor(images):
+        return np.array(list(map(tf.keras.preprocessing.image.img_to_array, images)))
+
+    def __resize_and_convert(self, image):
+        return image.resize(self.input_shape[:-1]).convert("RGB")
+
+    def prep_images(self, images):
+        images = map(self.__resize_and_convert, images)
+        tensor = self.__images_to_tensor(images)
+        tensor = self.__preprocess_tensor(tensor)
+
+        return tensor
+
+    @abc.abstractmethod
+    def __build(self, base_weights=None) -> tf.keras.models.Model:
+        raise NotImplementedError
--- a/image_prediction/stitching/init.py
+++ b/image_prediction/stitching/init.py
--- a/image_prediction/stitching/grouping.py
+++ b/image_prediction/stitching/grouping.py
@ -0,0 +1,63 @@
+from functools import lru_cache
+from itertools import groupby
+
+import numpy as np
+from funcy import compose, second
+
+from image_prediction.stitching.utils import make_coord_getter
+
+
+class CoordGrouper:
+    def __init__(self, axis, tolerance=0):
+        self.c1_getter = make_coord_getter(f"{other_axis(axis)}1")
+        self.c2_getter = make_coord_getter(f"{other_axis(axis)}2")
+        self.tolerance = tolerance
+
+    def group_pairs_by_lesser_coordinate(self, pairs):
+        return group_by_coordinate(pairs, self.c1_getter, self.tolerance)
+
+    def group_pairs_by_greater_coordinate(self, pairs):
+        return group_by_coordinate(pairs, self.c2_getter, self.tolerance)
+
+
+def other_axis(axis):
+    return "y" if axis == "x" else "x"
+
+
+def fuzzify(func, tolerance):
+    def inner(item):
+        nonlocal mid_points
+        nonlocal lower_bounds
+        nonlocal upper_bounds
+
+        value = func(item)
+        fits = (array(lower_bounds_array()) <= value) & (value <= array(upper_bounds_array()))
+        if any(fits):
+            return mid_points[np.argmax(fits)]
+        else:
+            mid_points = [*mid_points, value]
+            lower_bounds = [*lower_bounds, value - tolerance]
+            upper_bounds = [*upper_bounds, value + tolerance]
+            return value
+
+    def lower_bounds_array():
+        return tuple(lower_bounds)
+
+    def upper_bounds_array():
+        return tuple(upper_bounds)
+
+    @lru_cache(maxsize=None)
+    def array(tpl):
+        return np.array(tpl)
+
+    lower_bounds = []
+    upper_bounds = []
+    mid_points = []
+
+    return inner
+
+
+def group_by_coordinate(pairs, coord_getter, tolerance=0):
+    coord_getter = fuzzify(coord_getter, tolerance)
+    pairs = sorted(pairs, key=coord_getter)
+    return map(compose(list, second), groupby(pairs, coord_getter))
--- a/image_prediction/stitching/merging.py
+++ b/image_prediction/stitching/merging.py
@ -0,0 +1,174 @@
+from copy import deepcopy
+from functools import reduce
+from typing import Iterable, Callable, List
+
+from PIL import Image
+from funcy import juxt, first, rest, rcompose, rpartial
+
+from image_prediction.image_extractor.extractor import ImageMetadataPair
+from image_prediction.info import Info
+from image_prediction.stitching.grouping import CoordGrouper
+from image_prediction.stitching.split_mapper import HorizontalSplitMapper, VerticalSplitMapper
+from image_prediction.stitching.utils import make_coord_getter, flatten_groups_once, validate_box
+from image_prediction.utils.generic import until
+
+
+def no_new_merges(pairs1, pairs2):
+    return len(pairs1) == len(pairs2)
+
+
+def merge_along_both_axes(pairs: Iterable[ImageMetadataPair], tolerance=0) -> List[ImageMetadataPair]:
+    pairs = merge_along_axis(pairs, "x", tolerance=tolerance)
+    pairs = list(merge_along_axis(pairs, "y", tolerance=tolerance))
+
+    return pairs
+
+
+def merge_along_axis(pairs: Iterable[ImageMetadataPair], axis, tolerance=0) -> Iterable[ImageMetadataPair]:
+    """Partially merges image-metadata pairs of adjacent images along a given axis. Needs to be iterated with
+    alternating axes until no more merges happen to merge all adjacent images.
+
+    Explanation:
+
+        Merging algorithm works as follows:
+        A dot represents a pair, a bracket a group and a colon a merged pair.
+        1) Start with pairs:   (........)
+        2) Align on lesser:    ([....] [....])
+        3) Align on greater:   ([[..] [..]] [[....]])
+        4) Flatten once:       ([..] [..] [....])
+        5) Merge orthogonally: ([:] [..] [:..])
+        6) Flatten once:       (:..:..)
+    """
+
+    def group_pairs_within_groups_by_greater_coordinate(groups):
+        return map(CoordGrouper(axis, tolerance=tolerance).group_pairs_by_greater_coordinate, groups)
+
+    def merge_groups_along_orthogonal_axis(groups):
+        return map(rpartial(make_group_merger(axis), tolerance), groups)
+
+    def group_pairs_by_lesser_coordinate(pairs):
+        return CoordGrouper(axis, tolerance=tolerance).group_pairs_by_lesser_coordinate(pairs)
+
+    return rcompose(
+        group_pairs_by_lesser_coordinate,
+        group_pairs_within_groups_by_greater_coordinate,
+        flatten_groups_once,
+        merge_groups_along_orthogonal_axis,
+        flatten_groups_once,
+    )(pairs)
+
+
+def make_group_merger(axis):
+    return {"y": merge_group_vertically, "x": merge_group_horizontally}[axis]
+
+
+def merge_group_vertically(group: Iterable[ImageMetadataPair], tolerance=0):
+    return merge_group(group, "y", tolerance=tolerance)
+
+
+def merge_group_horizontally(group: Iterable[ImageMetadataPair], tolerance=0):
+    return merge_group(group, "x", tolerance=tolerance)
+
+
+def merge_group(group: Iterable[ImageMetadataPair], direction, tolerance=0):
+    reduce_group = make_merger_aggregator(direction, tolerance=tolerance)
+    return until(no_new_merges, reduce_group, group)
+
+
+def make_merger_aggregator(axis, tolerance=0) -> Callable[[Iterable[ImageMetadataPair]], Iterable[ImageMetadataPair]]:
+    """Produces a function f : [H, T1, ... Tn] -> [HTi...Tj, Tk ... Tl] that merges adjacent image-metadata pairs on the
+    head H and aggregates non-adjacent in the tail T.
+
+    Note:
+        When tolerance > 0, the bounding box of the merged image no longer matches the bounding box of the mereged
+        metadata. This is intended behaviour, but might be not be expected by the caller.
+    """
+
+    def merger_aggregator(pairs: Iterable[ImageMetadataPair]):
+        def merge_on_head_and_aggregate_in_tail(pairs_aggr: Iterable[ImageMetadataPair], pair: ImageMetadataPair):
+            """Keeps the image that is being merged with as the head and aggregates non-mergables in the tail."""
+            aggr, non_aggr = juxt(first, rest)(pairs_aggr)
+            if abs(c2_getter(aggr) - c1_getter(pair)) <= tolerance:
+                aggr = pair_merger(aggr, pair)
+                return aggr, *non_aggr
+            else:
+                return aggr, pair, *non_aggr
+
+        # Requires H to be the least element in image-concatenation direction by c1, since the concatenation happens
+        # only in c1 -> c2 direction.
+        pairs = sorted(pairs, key=c1_getter)
+        head_pair, pairs = juxt(first, rest)(pairs)
+        return list(reduce(merge_on_head_and_aggregate_in_tail, pairs, [head_pair]))
+
+    assert tolerance >= 0
+
+    c1_getter = make_coord_getter(f"{axis}1")
+    c2_getter = make_coord_getter(f"{axis}2")
+    pair_merger = make_pair_merger(axis)
+
+    return merger_aggregator
+
+
+def make_pair_merger(axis):
+    return {"y": merge_pair_vertically, "x": merge_pair_horizontally}[axis]
+
+
+def merge_pair_vertically(p1: ImageMetadataPair, p2: ImageMetadataPair):
+    metadata_merged = merge_metadata_vertically(p1.metadata, p2.metadata)
+    image_concatenated = concat_images_vertically(p1.image, p2.image, metadata_merged)
+    return ImageMetadataPair(image_concatenated, metadata_merged)
+
+
+def merge_pair_horizontally(p1: ImageMetadataPair, p2: ImageMetadataPair):
+    metadata_merged = merge_metadata_horizontally(p1.metadata, p2.metadata)
+    image_concatenated = concat_images_horizontally(p1.image, p2.image, metadata_merged)
+    return ImageMetadataPair(image_concatenated, metadata_merged)
+
+
+def merge_metadata_vertically(m1: dict, m2: dict):
+    m1, m2 = map(VerticalSplitMapper, [m1, m2])
+    return merge_metadata(m1, m2)
+
+
+def merge_metadata_horizontally(m1: dict, m2: dict):
+    m1, m2 = map(HorizontalSplitMapper, [m1, m2])
+    return merge_metadata(m1, m2)
+
+
+def merge_metadata(m1: dict, m2: dict):
+
+    c1 = min(m1.c1, m2.c1)
+    c2 = max(m1.c2, m2.c2)
+    dim = abs(c2 - c1)
+
+    merged = deepcopy(m1)
+    merged.dim = dim
+    merged.c1 = c1
+    merged.c2 = c2
+
+    validate_box(merged.wrapped)
+
+    return merged.wrapped
+
+
+def concat_images_vertically(im1: Image, im2: Image, metadata: dict):
+    return concat_images(im1, im2, metadata, 1)
+
+
+def concat_images_horizontally(im1: Image, im2: Image, metadata: dict):
+    return concat_images(im1, im2, metadata, 0)
+
+
+def concat_images(im1: Image, im2: Image, metadata: dict, axis):
+
+    im_aggr = Image.new(im1.mode, (metadata[Info.WIDTH], metadata[Info.HEIGHT]))
+
+    images = [im1, im2]
+
+    offsets = 0, im1.size[axis], im_aggr.size[axis] - im2.size[axis]
+
+    for im, offset in zip(images, offsets):
+        box = (offset, 0) if not axis else (0, offset)
+        im_aggr.paste(im, box=box)
+
+    return im_aggr
--- a/image_prediction/stitching/split_mapper.py
+++ b/image_prediction/stitching/split_mapper.py
@ -0,0 +1,40 @@
+from copy import deepcopy
+from dataclasses import field, dataclass
+from operator import attrgetter
+
+from image_prediction.info import Info
+
+
+@dataclass
+class SplitMapper:
+    """Manages access into a mapping M by indirection through a specified access mapping to achieve a common
+    interface between various M_i.
+    """
+
+    __access_mapping: dict
+    wrapped: dict
+    __wrapped: dict = field(init=False)
+
+    def __post_init__(self):
+        for k, v in self.__access_mapping.items():
+            setattr(self, k, self.__wrapped[v])
+
+    @property
+    def wrapped(self):
+        ret = deepcopy(self.__wrapped)
+        ret.update(dict(zip(self.__access_mapping.values(), attrgetter(*self.__access_mapping.keys())(self))))
+        return ret
+
+    @wrapped.setter
+    def wrapped(self, wrapped):
+        self.__wrapped = wrapped
+
+
+class HorizontalSplitMapper(SplitMapper):
+    def __init__(self, wrapped: dict):
+        super().__init__({"dim": Info.WIDTH, "c1": Info.X1, "c2": Info.X2}, wrapped)
+
+
+class VerticalSplitMapper(SplitMapper):
+    def __init__(self, wrapped: dict):
+        super().__init__({"dim": Info.HEIGHT, "c1": Info.Y1, "c2": Info.Y2}, wrapped)
--- a/image_prediction/stitching/stitching.py
+++ b/image_prediction/stitching/stitching.py
@ -0,0 +1,13 @@
+from typing import Iterable, List
+
+from funcy import rpartial
+
+from image_prediction.image_extractor.extractor import ImageMetadataPair
+from image_prediction.stitching.merging import merge_along_both_axes, no_new_merges
+from image_prediction.utils.generic import until
+
+
+def stitch_pairs(pairs: Iterable[ImageMetadataPair], tolerance=0) -> List[ImageMetadataPair]:
+    """Given a collection of image-metadata pairs from the same pages, combines all pairs that constitute adjacent
+    images."""
+    return until(no_new_merges, rpartial(merge_along_both_axes, tolerance), pairs)
--- a/image_prediction/stitching/utils.py
+++ b/image_prediction/stitching/utils.py
@ -0,0 +1,67 @@
+import json
+from itertools import chain
+
+from image_prediction.exceptions import InvalidBox
+from image_prediction.formatter.formatters.enum import EnumFormatter
+from image_prediction.info import Info
+
+
+def flatten_groups_once(groups):
+    return chain.from_iterable(groups)
+
+
+def make_coord_getter(c):
+    return {
+        "x1": make_getter(Info.X1),
+        "x2": make_getter(Info.X2),
+        "y1": make_getter(Info.Y1),
+        "y2": make_getter(Info.Y2),
+    }[c]
+
+
+def make_getter(key):
+    def getter(pair):
+        return pair.metadata[key]
+
+    return getter
+
+
+def make_length_getter(dim):
+    return {
+        "width": make_getter(Info.WIDTH),
+        "height": make_getter(Info.HEIGHT),
+    }[dim]
+
+
+def validate_box(box):
+    validate_box_coords(box)
+    validate_box_size(box)
+    return box
+
+
+def validate_box_coords(box):
+
+    x_diff = box[Info.WIDTH] - (box[Info.X2] - box[Info.X1])
+    y_diff = box[Info.HEIGHT] - (box[Info.Y2] - box[Info.Y1])
+
+    if x_diff:
+        raise InvalidBox(f"Width and x-coordinates differ by {x_diff} units: {format_box(box)}")
+    if y_diff:
+        raise InvalidBox(f"Width and y-coordinates differ by {y_diff} units: {format_box(box)}")
+
+    return box
+
+
+def validate_box_size(box):
+
+    if not box[Info.WIDTH]:
+        raise InvalidBox(f"Zero width box: {format_box(box)}")
+
+    if not box[Info.HEIGHT]:
+        raise InvalidBox(f"Zero height box: {format_box(box)}")
+
+    return box
+
+
+def format_box(box):
+    return json.dumps(EnumFormatter()(box), indent=2)
--- a/image_prediction/transformer/init.py
+++ b/image_prediction/transformer/init.py
--- a/image_prediction/transformer/transformer.py
+++ b/image_prediction/transformer/transformer.py
@ -0,0 +1,20 @@
+import abc
+from typing import Iterable
+
+from funcy import curry, identity
+
+
+class Transformer(abc.ABC):
+    @abc.abstractmethod
+    def transform(self, obj):
+        raise NotImplementedError
+
+    def __call__(self, obj):
+        return self._apply(self.transform, obj)
+
+    @staticmethod
+    def _must_be_mapped_over(obj):
+        return isinstance(obj, Iterable) and not isinstance(obj, dict)
+
+    def _apply(self, func, obj):
+        return (curry(map) if self._must_be_mapped_over(obj) else identity)(func)(obj)
--- a/image_prediction/transformer/transformers/init.py
+++ b/image_prediction/transformer/transformers/init.py
--- a/image_prediction/transformer/transformers/coordinate/init.py
+++ b/image_prediction/transformer/transformers/coordinate/init.py
--- a/image_prediction/transformer/transformers/coordinate/coordinate_transformer.py
+++ b/image_prediction/transformer/transformers/coordinate/coordinate_transformer.py
@ -0,0 +1,22 @@
+import abc
+
+from image_prediction.transformer.transformer import Transformer
+
+
+class CoordinateTransformer(Transformer):
+    @abc.abstractmethod
+    def _forward(self, metadata):
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def _backward(self, metadata):
+        raise NotImplementedError
+
+    def forward(self, metadata):
+        return self._apply(self._forward, metadata)
+
+    def backward(self, metadata):
+        return self._apply(self._backward, metadata)
+
+    def transform(self, metadata):
+        return self.forward(metadata)
--- a/image_prediction/transformer/transformers/coordinate/fitz.py
+++ b/image_prediction/transformer/transformers/coordinate/fitz.py
@ -0,0 +1,10 @@
+from image_prediction.transformer.transformers.coordinate.coordinate_transformer import CoordinateTransformer
+
+
+class FitzCoordinateTransformer(CoordinateTransformer):
+    def _forward(self, metadata: dict):
+        """Fitz uses top left corner as origin; we take this as the reference coordinate system."""
+        return metadata
+
+    def _backward(self, metadata: dict):
+        return self.forward(metadata)
--- a/image_prediction/transformer/transformers/coordinate/fpdf.py
+++ b/image_prediction/transformer/transformers/coordinate/fpdf.py
@ -0,0 +1,10 @@
+from image_prediction.transformer.transformers.coordinate.coordinate_transformer import CoordinateTransformer
+
+
+class FPDFCoordinateTransformer(CoordinateTransformer):
+    def _forward(self, metadata: dict):
+        """FPDF uses top left corner as origin; we take this as the reference coordinate system."""
+        return metadata
+
+    def _backward(self, metadata: dict):
+        return self.forward(metadata)
--- a/image_prediction/transformer/transformers/coordinate/pdfnet.py
+++ b/image_prediction/transformer/transformers/coordinate/pdfnet.py
@ -0,0 +1,18 @@
+from operator import itemgetter
+
+from funcy import omit
+
+from image_prediction.info import Info
+from image_prediction.transformer.transformers.coordinate.coordinate_transformer import CoordinateTransformer
+
+
+class PDFNetCoordinateTransformer(CoordinateTransformer):
+    def _forward(self, metadata: dict):
+        """PDFNet coordinate system origin is in the bottom left corner."""
+        y1, y2, page_height = itemgetter(Info.Y1, Info.Y2, Info.PAGE_HEIGHT)(metadata)
+        y1_t = page_height - y2
+        y2_t = page_height - y1
+        return {**omit(metadata, [Info.Y1, Info.Y2]), **{Info.Y1: y1_t, Info.Y2: y2_t}}
+
+    def _backward(self, metadata: dict):
+        return self.forward(metadata)
--- a/image_prediction/transformer/transformers/response.py
+++ b/image_prediction/transformer/transformers/response.py
@ -1,18 +1,20 @@
-"""Defines functions for constructing service responses."""
-
-
 import math
-from itertools import starmap
 from operator import itemgetter

 from image_prediction.config import CONFIG
+from image_prediction.transformer.transformer import Transformer
+from image_prediction.utils import get_logger
+
+logger = get_logger()


-def build_response(predictions: list, metadata: list) -> list:
-    return list(starmap(build_image_info, zip(predictions, metadata)))
+class ResponseTransformer(Transformer):
+    def transform(self, data):
+        logger.debug("ResponseTransformer.transform")
+        return build_image_info(data)


-def build_image_info(prediction: dict, metadata: dict) -> dict:
+def build_image_info(data: dict) -> dict:
    def compute_geometric_quotient():
        page_area_sqrt = math.sqrt(abs(page_width * page_height))
        image_area_sqrt = math.sqrt(abs(x2 - x1) * abs(y2 - y1))
@ -20,9 +22,9 @@ def build_image_info(prediction: dict, metadata: dict) -> dict:

    page_width, page_height, x1, x2, y1, y2, width, height = itemgetter(
        "page_width", "page_height", "x1", "x2", "y1", "y2", "width", "height"
-    )(metadata)
+    )(data)

-    quotient = compute_geometric_quotient()
+    quotient = round(compute_geometric_quotient(), 4)

    min_image_to_page_quotient_breached = bool(quotient < CONFIG.filters.image_to_page_quotient.min)
    max_image_to_page_quotient_breached = bool(quotient > CONFIG.filters.image_to_page_quotient.max)
@ -33,13 +35,13 @@ def build_image_info(prediction: dict, metadata: dict) -> dict:
        width / height > CONFIG.filters.image_width_to_height_quotient.max
    )

-    min_confidence_breached = bool(max(prediction["probabilities"].values()) < CONFIG.filters.min_confidence)
-    prediction["label"] = prediction.pop("class")  # "class" as field name causes problem for Java objectmapper
-    prediction["probabilities"] = {klass: round(prob, 6) for klass, prob in prediction["probabilities"].items()}
+    classification = data["classification"]
+
+    min_confidence_breached = bool(max(classification["probabilities"].values()) < CONFIG.filters.min_confidence)

    image_info = {
-        "classification": prediction,
-        "position": {"x1": x1, "x2": x2, "y1": y1, "y2": y2, "pageNumber": metadata["page_idx"] + 1},
+        "classification": classification,
+        "position": {"x1": x1, "x2": x2, "y1": y1, "y2": y2, "pageNumber": data["page_idx"] + 1},
        "geometry": {"width": width, "height": height},
        "filters": {
            "geometry": {
@ -49,7 +51,7 @@ def build_image_info(prediction: dict, metadata: dict) -> dict:
                    "tooSmall": min_image_to_page_quotient_breached,
                },
                "imageFormat": {
-                    "quotient": width / height,
+                    "quotient": round(width / height, 4),
                    "tooTall": min_image_width_to_height_quotient_breached,
                    "tooWide": max_image_width_to_height_quotient_breached,
                },
--- a/image_prediction/utils.py
+++ b/image_prediction/utils.py
@ -1,68 +1,3 @@
-import logging
-import tempfile
-from contextlib import contextmanager
-
-from image_prediction.config import CONFIG


-@contextmanager
-def temporary_pdf_file(pdf: bytes):
-    with tempfile.NamedTemporaryFile() as f:
-        f.write(pdf)
-        yield f.name

-
-def make_logger_getter():
-
-    logger = logging.getLogger("imclf")
-    logger.propagate = False
-
-    handler = logging.StreamHandler()
-    handler.setLevel(CONFIG.service.logging_level)
-
-    log_format = "[%(levelname)s]: %(message)s"
-    formatter = logging.Formatter(log_format)
-
-    handler.setFormatter(formatter)
-    logger.addHandler(handler)
-
-    def get_logger():
-        return logger
-
-    return get_logger
-
-
-get_logger = make_logger_getter()
-
-
-def show_banner():
-    banner = '''
-    .....     .                            ...                ..          
-  .d88888Neu. 'L                        xH88"`~ .x8X    x .d88"    oec :  
-  F""""*8888888F    ..    .     :     :8888   .f"8888Hf  5888R    @88888  
- *      `"*88*"   .888: x888  x888.  :8888>  X8L  ^""`   '888R    8"*88%  
-  -....    ue=:. ~`8888~'888X`?888f` X8888  X888h         888R    8b.     
-         :88N  `   X888  888X '888>  88888  !88888.       888R   u888888> 
-         9888L     X888  888X '888>  88888   %88888       888R    8888R   
-  uzu.   `8888L    X888  888X '888>  88888 '> `8888>      888R    8888P   
-,""888i   ?8888    X888  888X '888>  `8888L %  ?888   !   888R    *888>   
-4  9888L   %888>  "*88%""*88" '888!`  `8888  `-*""   /   .888B .  4888    
-'  '8888   '88%     `~    "    `"`      "888.      :"    ^*888%   '888    
-     "*8Nu.z*"                            `""***~"`        "%      88R    
-                                                                   88>    
-                                                                   48     
-                                                                   '8     
-    '''
-
-    logger = logging.getLogger(__name__)
-    logger.propagate = False
-
-    handler = logging.StreamHandler()
-    handler.setLevel(logging.INFO)
-
-    formatter = logging.Formatter("")
-
-    handler.setFormatter(formatter)
-    logger.addHandler(handler)
-
-    logger.info(banner)
--- a/image_prediction/utils/init.py
+++ b/image_prediction/utils/init.py
@ -0,0 +1 @@
+from .logger import get_logger
--- a/image_prediction/utils/banner.py
+++ b/image_prediction/utils/banner.py
@ -0,0 +1,21 @@
+import logging
+
+from image_prediction.locations import BANNER_FILE
+
+
+def show_banner():
+    with open(BANNER_FILE) as f:
+        banner = "\n" + "".join(f.readlines()) + "\n"
+
+    logger = logging.getLogger(__name__)
+    logger.propagate = False
+
+    handler = logging.StreamHandler()
+    handler.setLevel(logging.INFO)
+
+    formatter = logging.Formatter("")
+
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+
+    logger.info(banner)
--- a/image_prediction/utils/generic.py
+++ b/image_prediction/utils/generic.py
@ -0,0 +1,7 @@
+from funcy import iterate, chunks
+
+
+def until(cond, func, *args, **kwargs):
+    for a, b in chunks(2, iterate(func, *args, **kwargs)):
+        if cond(a, b):
+            return a
--- a/image_prediction/utils/logger.py
+++ b/image_prediction/utils/logger.py
@ -0,0 +1,29 @@
+import logging
+
+from image_prediction.config import CONFIG
+
+logging.basicConfig()
+
+
+def make_logger_getter():
+    logger = logging.getLogger("imclf")
+    logger.propagate = False
+
+    handler = logging.StreamHandler()
+    handler.setLevel(CONFIG.service.logging_level)
+
+    log_format = "%(asctime)s %(levelname)-8s %(message)s"
+    formatter = logging.Formatter(log_format, datefmt="%Y-%m-%d %H:%M:%S")
+
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+
+    logger.setLevel(CONFIG.service.logging_level)
+
+    def get_logger():
+        return logger
+
+    return get_logger
+
+
+get_logger = make_logger_getter()
--- a/image_prediction/utils/pdf_annotation.py
+++ b/image_prediction/utils/pdf_annotation.py
@ -0,0 +1,99 @@
+"""Defines utilities for PDF processing."""
+
+import json
+from operator import itemgetter
+
+from PDFNetPython3.PDFNetPython import (
+    PDFDoc,
+    PDFNet,
+    Square,
+    Rect,
+    ColorPt,
+    BorderStyle,
+    SDFDoc,
+    Point,
+    Text,
+)
+
+from image_prediction.utils import get_logger
+
+logger = get_logger()
+
+
+def annotate_image(doc, image_info):
+    def draw_box():
+        sq = Square.Create(doc.GetSDFDoc(), Rect(*coords))
+        sq.SetColor(ColorPt(*color), 3)
+        sq.SetBorderStyle(BorderStyle(BorderStyle.e_dashed, 2, 0, 0, [4, 2]))
+        sq.SetPadding(4)
+        sq.RefreshAppearance()
+        page.AnnotPushBack(sq)
+
+    def add_note():
+        txt = Text.Create(doc.GetSDFDoc(), Point(*coords[:2]))
+        txt.SetContents(json.dumps(image_info, indent=2, ensure_ascii=False))
+        txt.SetColor(ColorPt(*color))
+        page.AnnotPushBack(txt)
+        txt.RefreshAppearance()
+
+    red = (1, 0, 0)
+    green = (0, 1, 0)
+    blue = (0, 0, 1)
+
+    if image_info["filters"]["allPassed"]:
+        color = green
+    elif image_info["filters"]["probability"]["unconfident"]:
+        color = red
+    else:
+        color = blue
+
+    page = doc.GetPage(image_info["position"]["pageNumber"])
+    coords = itemgetter("x1", "y1", "x2", "y2")(image_info["position"])
+
+    draw_box()
+    add_note()
+
+
+def init():
+    PDFNet.Initialize(
+        "Knecon AG(en.knecon.swiss):OEM:DDA-R::WL+:AMS(20211029):BECC974307DAB4F34B513BC9B2531B24496F6FCB83CD8AC574358A959730B622FABEF5C7"
+    )
+
+
+def draw_metadata_box(pdf_path, metadata, store_path):
+
+    init()
+
+    doc = PDFDoc(pdf_path)
+
+    color = (1, 0, 0)
+
+    print(metadata)
+
+    coords = itemgetter("x1", "y1", "x2", "y2")(metadata)
+    page = doc.GetPage(1)
+
+    sq = Square.Create(doc.GetSDFDoc(), Rect(*coords))
+    sq.SetColor(ColorPt(*color), 3)
+    sq.SetBorderStyle(BorderStyle(BorderStyle.e_dashed, 2, 0, 0, [4, 2]))
+    sq.SetPadding(4)
+    sq.RefreshAppearance()
+    page.AnnotPushBack(sq)
+
+    doc.Save(store_path, SDFDoc.e_linearized)
+
+    logger.info(f"Saved annotated PDF to {store_path}")
+
+
+def annotate_pdf(pdf_path, responses, store_path):
+
+    init()
+
+    doc = PDFDoc(pdf_path)
+
+    for image_info in responses:
+        annotate_image(doc, image_info)
+
+    doc.Save(store_path, SDFDoc.e_linearized)
+
+    logger.info(f"Saved annotated PDF to {store_path}")
--- a/pytest.ini
+++ b/pytest.ini
@ -1,2 +1,5 @@
 [pytest]
 norecursedirs = incl
+filterwarnings =
+    ignore:.*:DeprecationWarning
+    ignore:.*:DeprecationWarning
--- a/requirements.txt
+++ b/requirements.txt
@ -1,23 +1,22 @@
-Flask==2.0.2
+Flask==2.1.1
 requests==2.27.1
 iteration-utilities==0.11.0
-dvc==2.9.3
+dvc==2.10.0
 dvc[ssh]
-frozendict==2.3.0
-waitress==2.0.0
-envyaml~=1.8.210417
+waitress==2.1.1
+envyaml==1.10.211231
 dependency-check==0.6.*
-envyaml~=1.8.210417
-mlflow~=1.20.2
-numpy~=1.19.3
-PDFNetPython3~=9.1.0
-tqdm~=4.62.2
-pandas~=1.3.1
-mlflow~=1.20.2
-tensorflow~=2.5.0
-PDFNetPython3~=9.1.0
-Pillow~=8.3.2
-PyYAML~=5.4.1
-scikit_learn~=0.24.2
-
+mlflow==1.24.0
+numpy==1.22.3
+tqdm==4.64.0
+pandas==1.4.2
+tensorflow==2.8.0
+PyYAML==6.0
 pytest~=7.1.0
+funcy==1.17
+PyMuPDF==1.19.6
+fpdf==1.7.2
+coverage==6.3.2
+Pillow==9.1.0
+PDFNetPython3==9.1.0
+pdf2image==1.16.0
--- a/scripts/keras_MnWE.py
+++ b/scripts/keras_MnWE.py
@ -40,7 +40,7 @@ def make_predict_fn():
    model = make_model()

    def predict(*args):
-        # model = make_model()
+        # service_estimator = make_model()
        return model.predict(np.random.random(size=(1, 784)))

    return predict
--- a/scripts/run_pipeline.py
+++ b/scripts/run_pipeline.py
@ -0,0 +1,55 @@
+import argparse
+import json
+import os
+from glob import glob
+
+from image_prediction.pipeline import load_pipeline
+from image_prediction.utils import get_logger
+from image_prediction.utils.pdf_annotation import annotate_pdf
+
+logger = get_logger()
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("input", help="pdf file or directory")
+    parser.add_argument("--print", "-p", help="print output to terminal", action="store_true", default=False)
+    parser.add_argument("--page_interval", "-i", help="page interval [i, j), min index = 0", nargs=2, type=int)
+
+    args = parser.parse_args()
+
+    return args
+
+
+def process_pdf(pipeline, pdf_path, page_range=None):
+    with open(pdf_path, "rb") as f:
+        logger.info(f"Processing {pdf_path}")
+        predictions = list(pipeline(f.read(), page_range=page_range))
+
+    annotate_pdf(
+        pdf_path, predictions, os.path.join("/tmp", os.path.basename(pdf_path.replace(".pdf", "_annotated.pdf")))
+    )
+
+    return predictions
+
+
+def main(args):
+    pipeline = load_pipeline(verbose=False, tolerance=3)
+
+    if os.path.isfile(args.input):
+        pdf_paths = [args.input]
+    else:
+        pdf_paths = glob(os.path.join(args.input, "*.pdf"))
+    page_range = range(*args.page_interval) if args.page_interval else None
+
+    for pdf_path in pdf_paths:
+        predictions = process_pdf(pipeline, pdf_path, page_range=page_range)
+        if args.print:
+            print(pdf_path)
+            print(json.dumps(predictions, indent=2))
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
--- a/src/serve.py
+++ b/src/serve.py
@ -4,45 +4,29 @@ from waitress import serve

 from image_prediction.config import CONFIG
 from image_prediction.flask import make_prediction_server
-from image_prediction.predictor import Predictor
-from image_prediction.response import build_response
-from image_prediction.utils import get_logger, show_banner
+from image_prediction.pipeline import load_pipeline
+from image_prediction.utils import get_logger
+from image_prediction.utils.banner import show_banner

 logger = get_logger()


 def main():
    def predict(pdf):
-        # Keras model.predict stalls when model was loaded in different process
+        # Keras service_estimator.predict stalls when service_estimator was loaded in different process;
+        # therefore, we re-load the model (part of the pipeline) every time we process a new document.
        # https://stackoverflow.com/questions/42504669/keras-tensorflow-and-multiprocessing-in-python
-        predictor = Predictor()
-        predictions, metadata = predictor.predict_pdf(pdf, verbose=CONFIG.service.progressbar)
-        response = build_response(predictions, metadata)
-        return response
-
-    logger.info("Predictor ready.")
+        logger.debug("Loading pipeline...")
+        pipeline = load_pipeline(verbose=CONFIG.service.verbose)
+        logger.debug("Running pipeline...")
+        return list(pipeline(pdf))

    prediction_server = make_prediction_server(predict)
-
-    run_prediction_server(prediction_server, mode=CONFIG.webserver.mode)
-
-
-def run_prediction_server(app, mode="development"):
-    if mode == "development":
-        app.run(host=CONFIG.webserver.host, port=CONFIG.webserver.port, debug=True)
-    elif mode == "production":
-        serve(app, host=CONFIG.webserver.host, port=CONFIG.webserver.port)
+    serve(prediction_server, host=CONFIG.webserver.host, port=CONFIG.webserver.port, _quiet=False)


 if __name__ == "__main__":
-    logging_level = CONFIG.service.logging_level
-    logging.basicConfig(level=logging_level)
-    logging.getLogger("flask").setLevel(logging.ERROR)
-    logging.getLogger("urllib3").setLevel(logging.ERROR)
-    logging.getLogger("werkzeug").setLevel(logging.ERROR)
-    logging.getLogger("waitress").setLevel(logging.ERROR)
-    logging.getLogger("PIL").setLevel(logging.ERROR)
-    logging.getLogger("h5py").setLevel(logging.ERROR)
+    logging.basicConfig(level=CONFIG.service.logging_level)

    show_banner()

--- a/test/conftest.py
+++ b/test/conftest.py
@ -1,70 +1,515 @@
-import os.path
+import json
+import logging
+import os
+import random
+import string
+import tempfile
+from functools import partial
+from itertools import starmap
+from operator import itemgetter

+import fpdf
+import numpy as np
 import pytest
+from PIL import Image
+from funcy import rcompose, merge

-from image_prediction.predictor import Predictor
+from image_prediction.classifier.classifier import Classifier
+from image_prediction.classifier.image_classifier import ImageClassifier
+from image_prediction.estimator.adapter.adapter import EstimatorAdapter
+from image_prediction.estimator.preprocessor.preprocessors.basic import BasicPreprocessor
+from image_prediction.exceptions import (
+    UnknownEstimatorAdapter,
+    UnknownImageExtractor,
+    UnknownDatabaseType,
+    UnknownLabelFormat,
+)
+from image_prediction.image_extractor.extractor import ImageMetadataPair
+from image_prediction.image_extractor.extractors.mock import ImageExtractorMock
+from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor
+from image_prediction.info import Info
+from image_prediction.label_mapper.mappers.numeric import IndexMapper
+from image_prediction.label_mapper.mappers.probability import ProbabilityMapper, ProbabilityMapperKeys
+from image_prediction.locations import TEST_DATA_DIR
+from image_prediction.model_loader.database.connectors.mock import DatabaseConnectorMock
+from image_prediction.model_loader.loader import ModelLoader
+from image_prediction.model_loader.loaders.mlflow import MlflowConnector
+from image_prediction.pipeline import load_pipeline
+from image_prediction.redai_adapter.mlflow import MlflowModelReader
+from image_prediction.redai_adapter.model import PredictionModelHandle
+from image_prediction.utils import get_logger
+
+
+@pytest.fixture(autouse=True)
+def mute_logger():
+    logger = get_logger()
+    level = logger.level
+    logger.setLevel(logging.CRITICAL + 1)
+    yield
+    logger.setLevel(level)


@pytest.fixture
-def predictions():
+def image_extractor(extractor_type):
+    if extractor_type == "mock":
+        return ImageExtractorMock()
+    elif extractor_type == "parsable_pdf":
+        return ParsablePDFImageExtractor()
+    elif extractor_type == "default":
+        return None
+    else:
+        raise UnknownImageExtractor(f"No image extractor for type {extractor_type} was specified.")
+
+
+@pytest.fixture
+def image_classifier(classifier, monkeypatch, batch_of_expected_string_labels):
+    return ImageClassifier(classifier, preprocessor=BasicPreprocessor())
+
+
+@pytest.fixture
+def classifier(estimator_adapter, label_mapper):
+    classifier = Classifier(estimator_adapter, label_mapper)
+    return classifier
+
+
+@pytest.fixture
+def estimator_mock():
+    class EstimatorMock:
+        @staticmethod
+        def predict(batch):
+            return [None for _ in batch]
+
+        @staticmethod
+        def predict_proba(batch):
+            return [None for _ in batch]
+
+        def __call__(self, batch):
+            return self.predict(batch)
+
+    return EstimatorMock()
+
+
+@pytest.fixture
+def label_mapper(label_format, classes):
+    if label_format == "index":
+        return IndexMapper(classes)
+    elif label_format == "probability":
+        return ProbabilityMapper(classes)
+    else:
+        raise UnknownLabelFormat(f"No label mapper for label format {label_format} was specified.")
+
+
+@pytest.fixture(params=["index"])
+def label_format(request):
+    return request.param
+
+
+@pytest.fixture
+def expected_predictions_mapped(
+    label_format, batch_of_expected_string_labels, batch_of_expected_label_to_probability_mappings
+):
+    if label_format == "index":
+        return batch_of_expected_string_labels
+    elif label_format == "probability":
+        return batch_of_expected_label_to_probability_mappings
+    else:
+        raise UnknownLabelFormat(f"No label mapper for label format {label_format} was specified.")
+
+
+@pytest.fixture
+def expected_predictions(label_format, batch_of_expected_numeric_labels, batch_of_expected_probability_arrays):
+    if label_format == "index":
+        return batch_of_expected_numeric_labels
+    elif label_format == "probability":
+        return batch_of_expected_probability_arrays
+    else:
+        raise UnknownLabelFormat(f"No label mapper for label format {label_format} was specified.")
+
+
+@pytest.fixture
+def estimator_adapter(
+    estimator_type, estimator_mock, keras_model, model_handle_mock, output_batch_generator, monkeypatch
+):
+    if estimator_type == "mock":
+        estimator_adapter = EstimatorAdapter(estimator_mock)
+    elif estimator_type == "keras":
+        estimator_adapter = EstimatorAdapter(keras_model)
+    elif estimator_type == "redai":
+        estimator_adapter = EstimatorAdapter(PredictionModelHandle(model_handle_mock))
+    else:
+        raise UnknownEstimatorAdapter(f"No adapter for estimator type {estimator_type} was specified.")
+
+    def mock_predict(batch):
+        # Run real predict function to test for mechanical issues, but return externally defined
+        # predictions to test the callers of the estimator adapter against the expected predictions
+        return [next(output_batch_generator) for _ in _predict(batch)]
+
+    _predict = estimator_adapter.predict
+    monkeypatch.setattr(estimator_adapter, "predict", mock_predict)
+
+    return estimator_adapter
+
+
+@pytest.fixture
+def keras_model(input_size):
+    import os
+
+    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+
+    import tensorflow as tf
+
+    tf.keras.backend.set_image_data_format("channels_last")
+
+    inputs = tf.keras.Input(shape=input_size)
+    conv = tf.keras.layers.Conv2D(3, 3)
+    dense = tf.keras.layers.Dense(10)
+
+    outputs = tf.keras.layers.Dense(10)(dense(conv(inputs)))
+    model = tf.keras.Model(inputs=inputs, outputs=outputs)
+    model.compile()
+
+    return model
+
+
+@pytest.fixture
+def images(input_batch):
+    return list(map(array_to_image, input_batch))
+
+
+@pytest.fixture
+def input_batch(batch_size, input_size):
+    return np.random.random_sample(size=(batch_size, *input_size))
+
+
+@pytest.fixture(params=[0, 1, 2, 16, 32])
+def batch_size(request):
+    return request.param
+
+
+@pytest.fixture
+def input_size(alpha, __input_size):
+    w, h, d = __input_size
+    return w, h, d + alpha
+
+
+@pytest.fixture(params=[False])
+def alpha(request):
+    return request.param
+
+
+@pytest.fixture(params=[{"width": 10, "height": 15, "depth": 3}, {"width": 150, "height": 100, "depth": 3}])
+def __input_size(request):
+    return itemgetter("width", "height", "depth")(request.param)
+
+
+def array_to_image(array):
+    assert np.all(array <= 1)
+    assert np.all(array >= 0)
+
+    if array.shape[-1] == 3:
+        mode = "RGB"
+    elif array.shape[-1] == 4:
+        mode = "RGBA"
+    else:
+        raise ValueError(f"Unexpected number of channels {array.shape[-1]}. Expected 3 or 4.")
+
+    # noinspection PyTypeChecker
+    return Image.fromarray(np.uint8(array * 255), mode=mode)
+
+
+@pytest.fixture
+def batch_of_expected_string_labels(batch_of_expected_numeric_labels, classes):
+    return map_labels(batch_of_expected_numeric_labels, classes)
+
+
+@pytest.fixture
+def batch_of_expected_numeric_labels(batch_size, classes):
+    return random.choices(range(len(classes)), k=batch_size)
+
+
+@pytest.fixture
+def batch_of_expected_label_to_probability_mappings(batch_of_expected_probability_arrays, classes):
+    def map_probabilities(probabilities):
+        lbl2prob = dict(sorted(zip(classes, map(rounder, probabilities)), key=itemgetter(1), reverse=True))
+        most_likely = [*lbl2prob][0]
+        return {ProbabilityMapperKeys.LABEL: most_likely, ProbabilityMapperKeys.PROBABILITIES: lbl2prob}
+
+    rounder = rcompose(partial(np.round, decimals=4), float)
+    return list(map(map_probabilities, batch_of_expected_probability_arrays))
+
+
+@pytest.fixture
+def batch_of_expected_probability_arrays(batch_size, classes):
+    return [np.random.uniform(size=len(classes)) for _ in range(batch_size)]
+
+
+@pytest.fixture
+def output_batch_generator(expected_predictions):
+    return iter(expected_predictions)
+
+
+@pytest.fixture
+def classes():
+    return ["A", "B", "C"]
+
+
+def map_labels(numeric_labels, classes):
+    return [classes[nl] for nl in numeric_labels]
+
+
+@pytest.fixture
+def metadata_plus_mapped_prediction(expected_predictions_mapped, metadata):
+    return [{"classification": epm, **mdt} for epm, mdt in zip(expected_predictions_mapped, metadata)]
+
+
+@pytest.fixture
+def metadata_formatted_plus_mapped_prediction_formatted(expected_predictions_mapped_and_formatted, metadata_formatted):
    return [
-        {
-            "class": "signature",
-            "probabilities": {
-                "signature": 1.0,
-                "logo": 9.150285377746546e-19,
-                "other": 4.374506412383356e-19,
-                "formula": 3.582569597002796e-24,
-            },
-        }
+        {"classification": epm, **mdt}
+        for epm, mdt in zip(expected_predictions_mapped_and_formatted, metadata_formatted)
    ]


@pytest.fixture
-def metadata():
-    return [
-        {
-            "page_height": 612.0,
-            "page_width": 792.0,
-            "height": 61.049999999999955,
-            "width": 139.35000000000002,
-            "page_idx": 8,
-            "x1": 63.5,
-            "x2": 202.85000000000002,
-            "y1": 472.0,
-            "y2": 533.05,
+def expected_predictions_mapped_and_formatted(expected_predictions_mapped):
+    return [{k.value: v for k, v in epm.items()} for epm in expected_predictions_mapped]
+
+
+@pytest.fixture
+def metadata(images, info_label_map):
+    page_idx = 0
+
+    def current_page_idx():
+        nonlocal page_idx
+        page_idx += random.randint(0, 3)
+        return min(page_idx, len(images) - 1)
+
+    def build_image_metadata(image):
+        width, height = image.size
+        page_width = 595
+        page_height = 842
+        x1 = random.randint(0, page_width - width)
+        x2 = x1 + width
+        y1 = random.randint(0, page_height - height)
+        y2 = y1 + height
+        metadata = {
+            info_label_map.PAGE_WIDTH: page_width,
+            info_label_map.PAGE_HEIGHT: page_height,
+            info_label_map.PAGE_IDX: current_page_idx(),
+            info_label_map.WIDTH: width,
+            info_label_map.HEIGHT: height,
+            info_label_map.X1: x1,
+            info_label_map.X2: x2,
+            info_label_map.Y1: y1,
+            info_label_map.Y2: y2,
+            info_label_map.ALPHA: image.mode == "RGBA",
        }
-    ]
+        return metadata
+
+    return list(map(build_image_metadata, images))


@pytest.fixture
-def response():
-    return [
-        {
-            "classification": {
-                "label": "signature",
-                "probabilities": {"formula": 0.0, "logo": 0.0, "other": 0.0, "signature": 1.0},
-            },
-            "filters": {
-                "allPassed": True,
-                "geometry": {
-                    "imageFormat": {"quotient": 2.282555282555285, "tooTall": False, "tooWide": False},
-                    "imageSize": {"quotient": 0.13248234868245012, "tooLarge": False, "tooSmall": False},
-                },
-                "probability": {"unconfident": False},
-            },
-            "geometry": {"height": 61.049999999999955, "width": 139.35000000000002},
-            "position": {"pageNumber": 9, "x1": 63.5, "x2": 202.85000000000002, "y1": 472.0, "y2": 533.05},
-        }
-    ]
+def info_label_map():
+    return Info


@pytest.fixture
-def predictor():
-    return Predictor()
+def metadata_formatted(metadata):
+    def format_metadata(metadata):
+        return {key.value: val for key, val in metadata.items()}
+
+    return list(map(format_metadata, metadata))


@pytest.fixture
-def test_pdf():
-    with open("./test/test_data/f2dc689ca794fccb8cd38b95f2bf6ba9.pdf", "rb") as f:
-        return f.read()
+def image_metadata_pairs(images, metadata):
+    return list(starmap(ImageMetadataPair, zip(images, metadata)))
+
+
+@pytest.fixture
+def pdf(image_metadata_pairs):
+    pdf = fpdf.FPDF(unit="pt")
+
+    for pair in image_metadata_pairs:
+        add_image(pdf, pair)
+
+    return pdf_stream(pdf)
+
+
+def add_image(pdf, image_metadata_pair, suffix="png"):
+    while fewer_pages_then_required(image_metadata_pair.metadata[Info.PAGE_IDX], pdf):
+        pdf.add_page()
+
+    add_image_to_last_page(pdf, image_metadata_pair, suffix=suffix)
+
+
+def fewer_pages_then_required(page_idx, pdf):
+    return page_idx > pdf.page - 1
+
+
+def pdf_stream(pdf: fpdf.fpdf.FPDF):
+    return pdf.output(dest="S").encode("latin1")
+
+
+def add_image_to_last_page(pdf: fpdf.fpdf.FPDF, image_metadata_pair, suffix):
+    image, metadata = image_metadata_pair
+    x, y, w, h = itemgetter(Info.X1, Info.Y1, Info.WIDTH, Info.HEIGHT)(metadata)
+
+    with tempfile.NamedTemporaryFile(suffix=f".{suffix}") as temp_image:
+        image.save(temp_image.name)
+        pdf.image(temp_image.name, x=x, y=y, w=w, h=h, type=suffix)
+
+
+@pytest.fixture
+def model():
+    class Model:
+        @staticmethod
+        def predict(*args):
+            return True
+
+        @staticmethod
+        def predict_proba(*args):
+            return True
+
+    return Model()
+
+
+@pytest.fixture
+def model_database_record_identifier():
+    return "".join(random.sample(string.ascii_letters, k=10))
+
+
+@pytest.fixture
+def model_database_record(model, classes):
+    return {"model": model, "classes": classes}
+
+
+@pytest.fixture
+def model_database(model_database_record, model_database_record_identifier):
+    return {model_database_record_identifier: model_database_record}
+
+
+@pytest.fixture
+def database_connector(database_type, model_database, mlflow_reader):
+    if database_type == "mock":
+        return DatabaseConnectorMock(model_database)
+
+    elif database_type == "mlflow":
+        return MlflowConnector(mlflow_reader)
+
+    else:
+        raise UnknownDatabaseType(f"No connector for database type {database_type} was specified.")
+
+
+@pytest.fixture
+def model_loader(database_connector):
+    return ModelLoader(database_connector)
+
+
+@pytest.fixture
+def mlflow_run_id():
+    from image_prediction.config import CONFIG
+
+    return CONFIG.service.run_id
+
+
+@pytest.fixture
+def mlruns_dir():
+    from image_prediction.locations import MLRUNS_DIR
+
+    return MLRUNS_DIR
+
+
+@pytest.fixture
+def mlflow_reader(mlruns_dir):
+    return MlflowModelReader(mlruns_dir)
+
+
+@pytest.fixture
+def model_handle_mock(estimator_mock):
+    class ModelHandleMock:
+        def __init__(self):
+            self.model = estimator_mock
+
+        def prep_images(self, batch):
+            return [None for _ in batch]
+
+        def predict(self, batch):
+            return [None for _ in batch]
+
+        def predict_proba(self, batch):
+            return [None for _ in batch]
+
+    return ModelHandleMock()
+
+
+@pytest.fixture
+def real_pdf():
+    with open(os.path.join(TEST_DATA_DIR, "f2dc689ca794fccb8cd38b95f2bf6ba9.pdf"), "rb") as f:
+        yield f.read()
+
+
+@pytest.fixture
+def real_expected_service_response():
+    with open(os.path.join(TEST_DATA_DIR, "f2dc689ca794fccb8cd38b95f2bf6ba9_predictions.json"), "r") as f:
+        yield json.load(f)
+
+
+@pytest.fixture
+def pipeline():
+    pipeline = load_pipeline(verbose=False)
+    return pipeline
+
+
+def transform_equal(a, b):
+    return (list(a) if isinstance(a, map) else a) == b
+
+
+def get_base_position_metadata(width, height, page_width, page_height):
+    return {
+        Info.WIDTH: width,
+        Info.HEIGHT: height,
+        Info.PAGE_IDX: 0,
+        Info.PAGE_WIDTH: page_width,
+        Info.PAGE_HEIGHT: page_height,
+    }
+
+
+@pytest.fixture
+def base_patch_metadata(width, height, page_width, page_height):
+    metadata = get_base_position_metadata(width, height, page_width, page_height)
+    metadata = merge(metadata, {Info.X1: 0, Info.Y1: 0, Info.X2: width, Info.Y2: height})
+    return metadata
+
+
+@pytest.fixture(params=[33, 100])
+def height(request):
+    return request.param
+
+
+@pytest.fixture(params=[10, 31])
+def width(request):
+    return request.param
+
+
+@pytest.fixture(params=[220, 30])
+def page_height(request):
+    return request.param
+
+
+@pytest.fixture(params=[100, 310])
+def page_width(request):
+    return request.param
+
+
+def random_single_color_image_from_metadata(metadata):
+    image = Image.new(
+        "RGB", (metadata[Info.WIDTH], metadata[Info.HEIGHT]), color=tuple(map(int, np.random.uniform(size=3) * 255))
+    )
+    return image
+
+
+def gray_image_from_metadata(metadata):
+    image = Image.new("RGB", (metadata[Info.WIDTH], metadata[Info.HEIGHT]), color=(100, 100, 100))
+    return image
--- a/test/test_data/f2dc689ca794fccb8cd38b95f2bf6ba9.pdf
+++ b/test/test_data/f2dc689ca794fccb8cd38b95f2bf6ba9.pdf
--- a/test/data/f2dc689ca794fccb8cd38b95f2bf6ba9_predictions.json
+++ b/test/data/f2dc689ca794fccb8cd38b95f2bf6ba9_predictions.json
@ -0,0 +1,42 @@
+[
+  {
+    "classification": {
+      "label": "formula",
+      "probabilities": {
+        "formula": 1.0,
+        "logo": 0.0,
+        "other": 0.0,
+        "signature": 0.0
+      }
+    },
+    "position": {
+      "x1": 321,
+      "x2": 515,
+      "y1": 348,
+      "y2": 542,
+      "pageNumber": 2
+    },
+    "geometry": {
+      "width": 194,
+      "height": 194
+    },
+    "filters": {
+      "geometry": {
+        "imageSize": {
+          "quotient": 0.2741,
+          "tooLarge": false,
+          "tooSmall": false
+        },
+        "imageFormat": {
+          "quotient": 1.0,
+          "tooTall": false,
+          "tooWide": false
+        }
+      },
+      "probability": {
+        "unconfident": false
+      },
+      "allPassed": true
+    }
+  }
+]
--- a/test/data/stitching_with_tolerance.json
+++ b/test/data/stitching_with_tolerance.json
@ -0,0 +1,92 @@
+{
+    "input": [
+        {
+            "width": 100,
+            "height": 8,
+            "page_idx": 0,
+            "page_width": 100,
+            "page_height": 100,
+            "x1": 0,
+            "y1": 0,
+            "x2": 100,
+            "y2": 8
+        },
+        {
+            "width": 100,
+            "height": 9,
+            "page_idx": 0,
+            "page_width": 100,
+            "page_height": 100,
+            "x1": 0,
+            "y1": 9,
+            "x2": 100,
+            "y2": 18
+        },
+        {
+            "width": 100,
+            "height": 35,
+            "page_idx": 0,
+            "page_width": 100,
+            "page_height": 100,
+            "x1": 0,
+            "y1": 18,
+            "x2": 100,
+            "y2": 53
+        },
+        {
+            "width": 47,
+            "height": 46,
+            "page_idx": 0,
+            "page_width": 100,
+            "page_height": 100,
+            "x1": 0,
+            "y1": 54,
+            "x2": 47,
+            "y2": 100
+        },
+        {
+            "width": 31,
+            "height": 46,
+            "page_idx": 0,
+            "page_width": 100,
+            "page_height": 100,
+            "x1": 48,
+            "y1": 54,
+            "x2": 79,
+            "y2": 100
+        },
+        {
+            "width": 20,
+            "height": 19,
+            "page_idx": 0,
+            "page_width": 100,
+            "page_height": 100,
+            "x1": 80,
+            "y1": 54,
+            "x2": 100,
+            "y2": 73
+        },
+        {
+            "width": 20,
+            "height": 27,
+            "page_idx": 0,
+            "page_width": 100,
+            "page_height": 100,
+            "x1": 80,
+            "y1": 73,
+            "x2": 100,
+            "y2": 100
+        }
+    ],
+    "target": {
+        "width": 100,
+        "height": 100,
+        "page_idx": 0,
+        "page_width": 100,
+        "page_height": 100,
+        "x1": 0,
+        "y1": 0,
+        "x2": 100,
+        "y2": 100
+    }
+}
--- a/test/exploration_tests/funcy_test.py
+++ b/test/exploration_tests/funcy_test.py
@ -0,0 +1,32 @@
+import pytest
+from funcy import rcompose, chunks
+
+
+def test_rcompose():
+    f = rcompose(lambda x: x ** 2, str, lambda x: x * 2)
+    assert f(3) == "99"
+
+
+def test_chunk_iterable_exact_split():
+    a, b = chunks(5, iter(range(10)))
+    assert a == list(range(5))
+    assert b == list(range(5, 10))
+
+
+def test_chunk_iterable_no_split():
+    a = next(chunks(10, iter(range(10))))
+    assert a == list(range(10))
+
+
+def test_chunk_iterable_last_partial():
+    a, b, c, d = chunks(3, iter(range(10)))
+    assert d == [9]
+
+
+def test_chunk_iterable_empty():
+    with pytest.raises(StopIteration):
+        next(chunks(3, iter(range(0))))
+
+
+def test_chunk_iterable_less_than_chunk_size_elements():
+    assert next(chunks(5, iter(range(2)))) == [0, 1]
--- a/test/integration_tests/actual_server_test.py
+++ b/test/integration_tests/actual_server_test.py
@ -0,0 +1,102 @@
+import socket
+from multiprocessing import Process
+
+import pytest
+import requests
+from funcy import retry
+from waitress import serve
+
+from image_prediction.flask import make_prediction_server
+from image_prediction.pipeline import load_pipeline
+
+
+@pytest.fixture
+def host():
+    return "127.0.0.1"
+
+
+def get_free_port(host):
+    sock = socket.socket()
+    sock.bind((host, 0))
+    return sock.getsockname()[1]
+
+
+@pytest.fixture
+def port(host):
+    return get_free_port(host)
+
+
+@pytest.fixture
+def url(host, port):
+    return f"http://{host}:{port}"
+
+
+@pytest.fixture(params=["dummy", "actual"])
+def server_type(request):
+    return request.param
+
+
+@pytest.fixture
+def server(server_type):
+    if server_type == "dummy":
+        return make_prediction_server(lambda x: int(x.decode()) // 2)
+
+    elif server_type == "actual":
+        return make_prediction_server(lambda x: list(load_pipeline(verbose=False)(x)))
+
+    else:
+        raise ValueError(f"Unknown server type {server_type}.")
+
+
+@pytest.fixture
+def host_and_port(host, port, server):
+    return {"host": host, "port": port}
+
+
+@retry(tries=5, timeout=1)
+def server_ready(url):
+    response = requests.get(f"{url}/ready")
+    response.raise_for_status()
+    return response.status_code == 200
+
+
+@pytest.fixture(autouse=True, scope="function")
+def server_process(server, host_and_port, url):
+    def get_server_process():
+        return Process(target=serve, kwargs={"app": server, **host_and_port})
+
+    server = get_server_process()
+    server.start()
+
+    if server_ready(url):
+        yield
+
+    server.kill()
+    server.join()
+    server.close()
+
+
+@pytest.mark.parametrize("server_type", ["actual"])
+def test_server_predict(url, real_pdf, real_expected_service_response):
+    response = requests.post(f"{url}/predict", data=real_pdf)
+    response.raise_for_status()
+    assert response.json() == real_expected_service_response
+
+
+@pytest.mark.parametrize("server_type", ["dummy"])
+def test_server_dummy_operation(url):
+    response = requests.post(f"{url}/predict", data=b"42")
+    response.raise_for_status()
+    assert response.json() == 21
+
+
+@pytest.mark.parametrize("server_type", ["dummy"])
+def test_server_health_check(url):
+    response = requests.get(f"{url}/health")
+    response.raise_for_status()
+    assert response.status_code == 200
+
+
+@pytest.mark.parametrize("server_type", ["dummy"])
+def test_server_ready_check(url):
+    assert server_ready(url)
--- a/test/unit_tests/box_validation_test.py
+++ b/test/unit_tests/box_validation_test.py
@ -0,0 +1,29 @@
+import pytest
+
+from image_prediction.exceptions import InvalidBox
+from image_prediction.info import Info
+from image_prediction.stitching.utils import validate_box_size, validate_box_coords
+
+
+def test_validate_fail_too_short():
+    box = {Info.WIDTH: 1, Info.HEIGHT: 0}
+    with pytest.raises(InvalidBox):
+        validate_box_size(box)
+
+
+def test_validate_fail_too_thin():
+    box = {Info.WIDTH: 0, Info.HEIGHT: 1}
+    with pytest.raises(InvalidBox):
+        validate_box_size(box)
+
+
+def test_validate_fail_xs_width_mismatch():
+    box = {Info.WIDTH: 2, Info.HEIGHT: 4, Info.X1: 0, Info.Y1: 0, Info.X2: 1, Info.Y2: 4}
+    with pytest.raises(InvalidBox):
+        validate_box_coords(box)
+
+
+def test_validate_fail_ys_height_mismatch():
+    box = {Info.WIDTH: 2, Info.HEIGHT: 3, Info.X1: 0, Info.Y1: 0, Info.X2: 2, Info.Y2: 4}
+    with pytest.raises(InvalidBox):
+        validate_box_coords(box)
--- a/test/unit_tests/classifier_test.py
+++ b/test/unit_tests/classifier_test.py
@ -0,0 +1,19 @@
+import pytest
+
+
+@pytest.mark.parametrize("estimator_type", ["mock", "keras", "redai"])
+@pytest.mark.parametrize("label_format", ["index", "probability"])
+def test_classifier(classifier, input_batch, expected_predictions_mapped):
+    predictions = classifier(input_batch)
+    assert predictions == expected_predictions_mapped
+
+
+def test_batch_format(input_batch):
+    def channels_are_last(input_batch):
+        return input_batch.shape[-1] == 3
+
+    def is_fourth_order_tensor(input_batch):
+        return input_batch.ndim == 4
+
+    assert channels_are_last(input_batch)
+    assert is_fourth_order_tensor(input_batch)
--- a/test/unit_tests/compositor_test.py
+++ b/test/unit_tests/compositor_test.py
@ -0,0 +1,32 @@
+import pytest
+
+from image_prediction.compositor.compositor import TransformerCompositor
+from image_prediction.formatter.formatters.camel_case import Snake2CamelCaseKeyFormatter
+from image_prediction.formatter.formatters.enum import EnumFormatter
+from image_prediction.formatter.formatters.identity import IdentityFormatter
+from test.conftest import transform_equal
+
+
+def test_identity(metadata):
+    compositor = TransformerCompositor(IdentityFormatter())
+    assert transform_equal(compositor(metadata), metadata)
+
+
+def test_composition(metadata, metadata_formatted):
+    compositor = TransformerCompositor(IdentityFormatter(), EnumFormatter())
+    assert transform_equal(compositor(metadata), metadata_formatted)
+
+
+@pytest.fixture()
+def compositor_test_enum_metadata(info_label_map):
+    return [{info_label_map.WIDTH: 100, info_label_map.PAGE_WIDTH: 200}]
+
+
+@pytest.fixture()
+def compositor_test_camel_case_metadata(info_label_map):
+    return [{"width": 100, "pageWidth": 200}]
+
+
+def test_enum_to_camel_case(compositor_test_enum_metadata, compositor_test_camel_case_metadata):
+    compositor = TransformerCompositor(EnumFormatter(), Snake2CamelCaseKeyFormatter())
+    assert transform_equal(compositor(compositor_test_enum_metadata), compositor_test_camel_case_metadata)
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Matthias Bisping	03e7b00cfd	refactoring	2022-04-14 12:20:05 +02:00
Matthias Bisping	7aee00cb49	alpha channel querying improved	2022-04-13 17:31:33 +02:00
Matthias Bisping	2cc52c4630	renaming	2022-04-13 13:36:45 +02:00
Matthias Bisping	daa1da3a50	fix name	2022-04-13 13:17:23 +02:00
Matthias Bisping	6a7debde14	added exploration tests	2022-04-13 13:15:05 +02:00
Matthias Bisping	b4f279c549	test for until	2022-04-13 13:12:19 +02:00
Matthias Bisping	f5881f2229	formatting	2022-04-13 13:06:20 +02:00
Matthias Bisping	62bfedfea8	alpha channel test fix	2022-04-13 12:06:55 +02:00
Matthias Bisping	1d88876ab1	alpha channel info WIP	2022-04-12 18:44:04 +02:00
Matthias Bisping	bbafad5561	refactoring in preparationfor alpha channel info	2022-04-12 18:22:38 +02:00
Matthias Bisping	f17a232009	tests for box validation	2022-04-12 16:54:40 +02:00
Matthias Bisping	88a46ae7cd	adjustet expected output for actual pipeline test for change from pixel to pdf units (x2 - x1 etc.) for width and height fields of metadata records	2022-04-12 16:42:31 +02:00
Matthias Bisping	e82a81f5c8	refactoring	2022-04-12 16:34:00 +02:00
Matthias Bisping	35c5b15e32	tolerance forwarding through pipeline constructor; box validation; tiny box filtering	2022-04-12 16:29:20 +02:00
Matthias Bisping	698e647c6f	applied black	2022-04-12 15:06:18 +02:00
Matthias Bisping	d8f86d14a5	fuzzy stitching completed	2022-04-12 15:04:32 +02:00
Matthias Bisping	bb7c1be630	fuzzy stitching WIP: mostly works, but sometimes fails. run test_image_stitcher_with_gaps to debug	2022-04-11 19:20:47 +02:00
Matthias Bisping	79cd31850d	fuzzy stitching WIP: added tolerance to stitching; added fuzzification function; added tests for grouping and (fuzzy and exact)	2022-04-11 16:47:47 +02:00
Matthias Bisping	3d335783dc	topological sorting of definitions by caller hierarchy	2022-04-11 16:08:54 +02:00
Matthias Bisping	bb79f9dd55	applied black	2022-04-11 13:57:32 +02:00
Matthias Bisping	585cdf5c70	integrated stitching into parsable pdf extractor	2022-04-11 13:57:10 +02:00
Matthias Bisping	04cf0245ed	formatting	2022-04-11 13:38:09 +02:00
Matthias Bisping	3530ef72c5	docstring update	2022-04-11 13:37:46 +02:00
Matthias Bisping	d80af336eb	refactoring	2022-04-11 13:28:39 +02:00
Matthias Bisping	bcf6dc5c47	generalized split mapper	2022-04-11 13:03:02 +02:00
Matthias Bisping	f4c0547405	refactoring: replaced split mapper with dataclass	2022-04-11 12:16:42 +02:00
Matthias Bisping	1bea5fb9a8	refactoring	2022-04-11 10:29:13 +02:00
Matthias Bisping	57440f5106	refactoring	2022-04-11 09:53:32 +02:00
Matthias Bisping	710783a2f8	merging algorithm explanation adjusted	2022-04-11 09:28:00 +02:00
Matthias Bisping	887b8339a2	renaming	2022-04-08 14:17:05 +02:00
Matthias Bisping	43cb0fffed	refactoring	2022-04-08 14:13:03 +02:00
Matthias Bisping	6e7645e319	topological sorting of definitions by caller hierarchy	2022-04-08 14:04:48 +02:00
Matthias Bisping	3b18fc6158	refactoring	2022-04-08 13:56:57 +02:00
Matthias Bisping	1b10445f91	refactoring	2022-04-08 12:01:20 +02:00
Matthias Bisping	5967149c49	refactoring	2022-04-07 21:49:55 +02:00
Matthias Bisping	303970db51	refactoring	2022-04-07 21:44:04 +02:00
Matthias Bisping	51793d19e9	refactoring	2022-04-07 21:39:01 +02:00
Matthias Bisping	e276a5ec27	refactoring	2022-04-07 21:20:55 +02:00
Matthias Bisping	7e6fe7cf11	refactoring	2022-04-07 21:12:57 +02:00
Matthias Bisping	bb5db1b4ef	refactoring	2022-04-07 20:47:58 +02:00
Matthias Bisping	8ac9fcb19f	stitcher test passes	2022-04-07 19:40:26 +02:00
Matthias Bisping	160973e2be	refactoring	2022-04-07 19:05:13 +02:00
Matthias Bisping	803cc57155	refactoring	2022-04-07 18:48:12 +02:00
Matthias Bisping	50b4d239cb	group merging done	2022-04-07 18:05:15 +02:00
Matthias Bisping	9bb07f95fb	refactoring	2022-04-07 17:51:53 +02:00
Matthias Bisping	29028cc1a5	refactoring	2022-04-07 17:44:54 +02:00
Matthias Bisping	2fcb0bd149	refactoring	2022-04-07 17:28:25 +02:00
Matthias Bisping	3e882dc247	group merging wip	2022-04-07 17:18:09 +02:00
Matthias Bisping	2b1e7cbb08	added img-mdat-pair merging logic	2022-04-07 16:11:12 +02:00
Matthias Bisping	5e8b55ef10	added image concatenation; refactoring	2022-04-07 11:42:38 +02:00
Matthias Bisping	3266e0af58	refactoring; added metadata merging logic	2022-04-06 15:55:35 +02:00
Matthias Bisping	7e2696d5c5	stitching impl wip	2022-04-05 23:39:17 +02:00
Matthias Bisping	302613bf2b	refactoring eager eval because double iter later	2022-04-05 23:08:41 +02:00
Matthias Bisping	66fd103d1b	refactoring	2022-04-05 22:56:08 +02:00
Matthias Bisping	6e5d6912ed	refactoring	2022-04-05 22:53:26 +02:00
Matthias Bisping	b1efb5ed09	refactoring	2022-04-05 19:40:13 +02:00
Matthias Bisping	ef70e11352	refactoring	2022-04-05 19:38:29 +02:00
Matthias Bisping	315679468b	applied black	2022-04-05 19:35:36 +02:00
Matthias Bisping	64e3350dee	refactoring	2022-04-05 19:35:13 +02:00
Matthias Bisping	6a7e0e1000	refactoring	2022-04-05 19:33:22 +02:00
Matthias Bisping	11fc63035d	refactoring	2022-04-05 19:03:31 +02:00
Matthias Bisping	4bc295b212	refactoring	2022-04-05 18:57:08 +02:00
Matthias Bisping	4c46be4abc	test param adjustment	2022-04-05 18:09:43 +02:00
Matthias Bisping	37ee086b5d	applied black	2022-04-05 17:55:38 +02:00
Matthias Bisping	1fd30e68b6	test data generation for image stitching	2022-04-05 17:54:43 +02:00
Matthias Bisping	2c908162f1	refactoring	2022-04-05 16:31:57 +02:00
Matthias Bisping	4756b8c9bd	refactoring	2022-04-05 13:03:22 +02:00
Matthias Bisping	e0885c545a	added page range paramter to extractor	2022-04-05 13:03:17 +02:00
Matthias Bisping	fdb7ebe618	logging change	2022-04-04 23:37:49 +02:00
Matthias Bisping	ce69f7d160	removed obsolete imports	2022-04-04 21:50:10 +02:00
Matthias Bisping	8f61c4cba2	doc.extract_image(xref) can yield None; hence added filtering for None images	2022-04-04 21:49:45 +02:00
Matthias Bisping	f3e2b2335f	updated dependency versions	2022-04-04 19:35:49 +02:00
Matthias Bisping	9cda65ad41	removed obsolete code	2022-04-04 18:30:43 +02:00
Matthias Bisping	692e72b3b2	refactoring	2022-04-04 18:29:17 +02:00
Matthias Bisping	38869d52c6	refactoring	2022-04-04 18:17:49 +02:00
Matthias Bisping	e01b5c9acd	refactoring	2022-04-04 15:50:09 +02:00
Matthias Bisping	6a6fc19958	refactoring	2022-04-04 15:48:15 +02:00
Matthias Bisping	1b1f1aafef	refactoring	2022-04-04 14:19:06 +02:00
Matthias Bisping	caef37376b	renaming	2022-04-04 14:04:36 +02:00
Matthias Bisping	16aa951c96	refactoring	2022-04-04 14:01:19 +02:00
Matthias Bisping	89afb8f920	added cooridate transformation testing by images	2022-04-04 13:55:48 +02:00
Matthias Bisping	1ffc9dcc68	refactoring	2022-04-04 13:12:08 +02:00
Matthias Bisping	0976971117	refactoring	2022-04-04 10:23:22 +02:00
Matthias Bisping	b4b0058475	added additional corners coordinates for coordinate transformation tests	2022-04-04 10:18:23 +02:00
Matthias Bisping	2ee36dcb54	applied black	2022-04-03 04:48:11 +02:00
Matthias Bisping	ab382646b7	applied black	2022-04-03 04:47:49 +02:00
Matthias Bisping	8c916a79c3	updated gitignore	2022-04-03 04:47:36 +02:00
Matthias Bisping	3ff6dac2e0	added explanations for how the coordinate transformations were inferred	2022-04-03 04:46:52 +02:00
Matthias Bisping	d134884553	misc	2022-04-03 04:35:44 +02:00
Matthias Bisping	2d0545c928	refactoring	2022-04-03 04:31:50 +02:00
Matthias Bisping	65a4a8e34e	refactoring	2022-04-03 04:25:10 +02:00
Matthias Bisping	39c111fd42	integrated PDFNet coordinate transformer into pipeline	2022-04-03 04:08:00 +02:00
Matthias Bisping	0376223c9d	coordinate transformers refac	2022-04-03 04:00:15 +02:00
Matthias Bisping	bf85ef357c	coordinate transformers version 1 completed	2022-04-03 03:51:31 +02:00
Matthias Bisping	f6a7a14a20	pdfnet coordinate transformer wip	2022-04-03 03:19:46 +02:00
Matthias Bisping	41f783dc5d	coordinate transformer refac	2022-04-03 02:21:30 +02:00
Matthias Bisping	32397256c8	coordinate transformer wip	2022-04-03 02:20:03 +02:00
Matthias Bisping	f44e6f4fd7	coordinate transformer, added Fitz transformer	2022-04-03 02:15:41 +02:00
Matthias Bisping	3d2c97bc10	coordinate transformer wip	2022-04-03 01:58:51 +02:00
Matthias Bisping	9663cec12d	coordinate transformer wip	2022-04-03 01:54:51 +02:00
Matthias Bisping	c1c3f541d4	coordinate transformer wip	2022-04-03 01:45:01 +02:00
Matthias Bisping	4d86e78307	muting logger in tests	2022-04-02 19:31:08 +02:00
Matthias Bisping	1cf6ab256c	muting logger in tests	2022-04-02 18:34:13 +02:00
Matthias Bisping	a89e374c67	removed obsolete code	2022-04-02 03:41:55 +02:00
Matthias Bisping	0861e22542	fixed pipeline not working with flask... model was loaded in external process, probably; known issue	2022-04-02 03:38:44 +02:00
Matthias Bisping	7827869af4	fixed logger's logging level	2022-04-02 02:58:30 +02:00
Matthias Bisping	613bba8cfc	...	2022-04-02 02:45:21 +02:00
Matthias Bisping	5c23898280	added log messages to all pipelien components; converting pipelien output to list for REST transport; refactoring; added e2e test (flask + pipeline)... but hangs	2022-04-02 02:44:30 +02:00
Matthias Bisping	e8d0299e46	refactoring	2022-04-02 01:27:30 +02:00
Matthias Bisping	cb00aed62c	refactoring	2022-04-02 01:23:57 +02:00
Matthias Bisping	1501653673	coverage increased for flask tests	2022-04-02 00:16:01 +02:00
Matthias Bisping	b4b929b65f	added mocked server tests with flask testing uitilities	2022-04-01 21:55:59 +02:00
Matthias Bisping	3d1c251e10	removed redundant TF env var export	2022-04-01 21:35:10 +02:00
Matthias Bisping	c80549d5d3	refactoring: model wrapper to base class and derived class for efficient net	2022-04-01 21:32:18 +02:00
Matthias Bisping	070749880e	removed obsolete code	2022-04-01 21:13:15 +02:00
Matthias Bisping	94783c54f2	eliminated redai dependency; updated requirement versions	2022-04-01 21:10:41 +02:00
Matthias Bisping	2b48c6108b	added coverage.process_startup for multiprocessing coverage... but does not quite work yet	2022-04-01 19:51:33 +02:00
Matthias Bisping	da9b3d0cb9	applied black	2022-04-01 19:50:44 +02:00
Matthias Bisping	c372529ee5	dynamic waiting for server to be ready in tests	2022-04-01 19:04:41 +02:00
Matthias Bisping	1a1ece1f95	adjusted call of server running function	2022-04-01 12:22:24 +02:00
Matthias Bisping	426061e5ea	applied black	2022-04-01 12:20:32 +02:00
Matthias Bisping	7c2cf44ad0	refactoring	2022-04-01 00:21:57 +02:00
Matthias Bisping	c125e1ff6c	web server refactoring + tests	2022-03-31 23:43:14 +02:00
Matthias Bisping	dd007891c7	changed banner	2022-03-31 19:50:12 +02:00
Matthias Bisping	d3257fdeda	refactoring	2022-03-31 19:39:08 +02:00
Matthias Bisping	1581880ec6	added updated version of serve.py	2022-03-31 19:38:35 +02:00
Matthias Bisping	268b83a1ff	refactoring	2022-03-31 19:17:48 +02:00
Matthias Bisping	5caa9807e2	added response formatter and pipeline test	2022-03-31 19:01:32 +02:00
Matthias Bisping	82added50a	empty implementation of abstract base class method	2022-03-31 17:29:05 +02:00
Matthias Bisping	b6ccfbcf8f	removed obsolete import	2022-03-31 17:25:42 +02:00
Matthias Bisping	e17912caa9	derived enum formatter from key formatter	2022-03-31 17:22:54 +02:00
Matthias Bisping	3eaf9dc0e1	refactoring: introduced key mapper base class and proba mapper key enum	2022-03-31 16:55:58 +02:00
Matthias Bisping	0cefef4e15	more test cases for key transformer	2022-03-31 16:35:12 +02:00
Matthias Bisping	4f94cbd68d	refactoring	2022-03-31 16:26:40 +02:00
Matthias Bisping	2517b45d44	fixed bug in camel case transformer	2022-03-31 15:55:15 +02:00
Matthias Bisping	2a62ad7aba	typo	2022-03-31 15:48:52 +02:00
Matthias Bisping	20c980dbe6	fixed bug in camel case transformer	2022-03-31 15:47:45 +02:00
Matthias Bisping	726298b155	made formatter a transformer derivation	2022-03-31 15:26:30 +02:00
Matthias Bisping	479afbcd34	formatting	2022-03-31 15:20:41 +02:00
Matthias Bisping	4ab9f0d89b	corrected camel case converter	2022-03-31 15:18:59 +02:00
Matthias Bisping	d4604a2cb5	renaming	2022-03-31 14:52:37 +02:00
Matthias Bisping	4ebb36247e	refactoring	2022-03-31 14:49:46 +02:00
Matthias Bisping	7ec7390e90	refactoring	2022-03-31 12:52:35 +02:00
Matthias Bisping	dc1cdde458	refactoring; added compositor for formatters	2022-03-31 12:52:15 +02:00
Matthias Bisping	0921ef9a4f	removed obsolete import	2022-03-31 11:12:59 +02:00
Matthias Bisping	91dd467142	applied black	2022-03-30 19:38:15 +02:00
Matthias Bisping	b3e1604ecc	added floating point conversion to label mapper for json serializability	2022-03-30 19:36:45 +02:00
Matthias Bisping	20718996bd	refactoring; testing of prediction model handel redai adapter	2022-03-30 19:01:54 +02:00
Matthias Bisping	cc8d87338c	removed obsolete code	2022-03-30 18:17:35 +02:00
Matthias Bisping	258c1ab02d	testing laberl mappers for raising of excpetions when encountering unexpected input formats	2022-03-30 18:15:45 +02:00
Matthias Bisping	ce3d33955e	removing unused code / refactoring for coverage maximization	2022-03-30 18:03:21 +02:00
Matthias Bisping	a95cc4e06b	added config tests	2022-03-30 17:55:49 +02:00
Matthias Bisping	6d1ace473b	removed obsolete code	2022-03-30 16:35:47 +02:00
Matthias Bisping	0a22a35912	refactoring; renaming	2022-03-30 16:35:26 +02:00
Matthias Bisping	a5d3232dd0	testing index and probability label format in classifier prediction test	2022-03-30 16:34:17 +02:00
Matthias Bisping	49f9847d9a	removed obsolete code	2022-03-30 16:07:45 +02:00
Matthias Bisping	1c6f5749dd	updated classifier test for label mappers	2022-03-30 16:04:13 +02:00
Matthias Bisping	8bccec277f	added array label mapper	2022-03-30 15:54:18 +02:00
Matthias Bisping	7f37f841dd	renaming	2022-03-30 15:32:21 +02:00
Matthias Bisping	8c7e3e29f5	added label mapper	2022-03-30 14:17:58 +02:00
Matthias Bisping	99d8e921db	renaming	2022-03-30 13:57:29 +02:00
Matthias Bisping	6835394d30	added formatter test; refactored batch_size fixture	2022-03-30 13:43:13 +02:00
Matthias Bisping	ad6bb80900	fixed sorting predictions by probabilities in wrong order	2022-03-30 01:14:03 +02:00
Matthias Bisping	95209a5c9d	typo	2022-03-30 01:06:06 +02:00
Matthias Bisping	45a07c620a	fixed chaining bug that lead to greedy evaluation	2022-03-30 00:53:34 +02:00
Matthias Bisping	81ab9a5f53	tuning prediction format handling	2022-03-30 00:13:12 +02:00
Matthias Bisping	8b15ac6df4	docstring update	2022-03-29 23:57:09 +02:00
Matthias Bisping	e9489287bd	support for array prediction format	2022-03-29 23:56:22 +02:00
Matthias Bisping	15c0b73034	support for different prediction formats	2022-03-29 23:41:43 +02:00
Matthias Bisping	7a64af156b	refactoring	2022-03-29 22:59:01 +02:00
Matthias Bisping	60617fd622	added formatter to pipeline	2022-03-29 22:47:54 +02:00
Matthias Bisping	ade318c7b7	made classifier accept tupls of images in addition to np.arrays; added pipeline (wip)	2022-03-29 22:00:34 +02:00
Matthias Bisping	3339ed2eab	removed unneeded adapter derivatives and made estimator adapter abstract base class to normal class	2022-03-29 20:44:26 +02:00
Matthias Bisping	7340fb6dda	replaced string keys for metadata fields with enum members	2022-03-29 20:29:44 +02:00
Matthias Bisping	358d7ecd91	restructuring of modules	2022-03-29 20:02:40 +02:00
Matthias Bisping	d33a882d65	removed obsolete code	2022-03-29 19:54:14 +02:00
Matthias Bisping	06adedac57	reimplemented model loader logic and moved base weights into mlflow run dir	2022-03-29 19:50:43 +02:00
Matthias Bisping	edbc5c3f84	redoing model loading design	2022-03-29 18:21:14 +02:00
Matthias Bisping	f60bafd007	redoing model loading design	2022-03-29 17:25:06 +02:00
Matthias Bisping	a1c7dd4a8d	added identity preprocessor; changed default preprocessor to idenitity	2022-03-29 11:40:58 +02:00
Matthias Bisping	6b58756103	refactoring of mlflow model loader	2022-03-29 11:02:43 +02:00
Matthias Bisping	3b4c2a40b2	added patched test for mlflow model loader	2022-03-28 21:51:21 +02:00
Matthias Bisping	c06905625d	added model loader interface, model loader mock and mlflow model loader (the latter so far not tested)	2022-03-28 21:22:35 +02:00
Matthias Bisping	d44622dddc	test parametrization changed	2022-03-28 19:52:24 +02:00
Matthias Bisping	3c6dfed508	made input size adjustable via test fixture	2022-03-28 19:22:31 +02:00
Matthias Bisping	f18e183ab0	added type hint	2022-03-28 18:54:28 +02:00
Matthias Bisping	86f2abc553	renaming	2022-03-28 18:52:39 +02:00
Matthias Bisping	f0a8f2224c	refactoring	2022-03-28 18:50:18 +02:00
Matthias Bisping	9bf1dcbe1d	removed obsolete import	2022-03-28 18:31:09 +02:00
Matthias Bisping	9ce7b6e6da	refactoring	2022-03-28 18:30:51 +02:00
Matthias Bisping	e818b05472	applied black	2022-03-28 16:39:34 +02:00
Matthias Bisping	b818ee4724	fixed misaligned metadata and images	2022-03-28 16:38:46 +02:00
Julius Unverfehrt	9461be29d5	add ParsablePDFImageExtractor test	2022-03-28 15:42:54 +02:00
Julius Unverfehrt	2631eb5c0f	add metadata fixture	2022-03-28 12:05:07 +02:00
Matthias Bisping	643ab99bd3	added parsable pdf image extractor	2022-03-28 11:27:05 +02:00
Matthias Bisping	e0ab365bb9	list -> generator	2022-03-28 00:05:37 +02:00
Matthias Bisping	48737d9439	added extractor classifier	2022-03-28 00:01:19 +02:00
Matthias Bisping	a5147c9a58	added image extractor interface and mock	2022-03-27 23:05:27 +02:00
Matthias Bisping	4c939464b0	renaming	2022-03-27 22:59:28 +02:00
Matthias Bisping	334dc79f7e	refactoring	2022-03-27 18:13:58 +02:00
Matthias Bisping	9d58ae714f	renaming	2022-03-27 17:55:01 +02:00
Matthias Bisping	0f811bdc56	removed unnecessary kwarg	2022-03-27 01:24:29 +01:00
Matthias Bisping	d11333981f	applied black	2022-03-27 01:21:12 +01:00
Matthias Bisping	4fcd1e79d3	removed obsolete code; added missing __init__ for predictor	2022-03-27 01:20:03 +01:00
Matthias Bisping	5c5d132d7f	fixed batching issue in prediction monkey patch by introducinbg an output generator, that yields the expected predictions	2022-03-27 01:13:28 +01:00
Matthias Bisping	0f9510906d	refactoring; added predictor; mocking of predict function is broken: fixing next commit	2022-03-26 21:19:02 +01:00
Matthias Bisping	6343229c1e	added chunk_iterable tests	2022-03-26 20:24:59 +01:00
Matthias Bisping	7d21b0a585	refactoring	2022-03-26 19:54:18 +01:00
Matthias Bisping	364111db89	preprocessor refactoring	2022-03-26 19:38:34 +01:00
Matthias Bisping	ea298dacfa	renaming	2022-03-26 19:27:37 +01:00
Matthias Bisping	373c619b0c	formatting	2022-03-26 19:24:34 +01:00
Matthias Bisping	8aa0717007	added image-tensor conversion logic	2022-03-26 19:24:15 +01:00
Matthias Bisping	a3215e0bc3	renaming of service estimator to estimator	2022-03-25 18:24:05 +01:00
Matthias Bisping	c64bff0843	renaming of service estimator to estimator	2022-03-25 18:20:44 +01:00
Matthias Bisping	dd18087261	restructuring of modules	2022-03-25 18:18:17 +01:00
Matthias Bisping	d97b477208	added estimator preprocessor and removed adapter and adapter patch	2022-03-25 18:09:06 +01:00
Matthias Bisping	981d7816a0	refactoring: replaced estimator adapter with monkeypatch	2022-03-25 17:58:34 +01:00
Matthias Bisping	2e36a9d46d	added type hint	2022-03-25 16:28:17 +01:00
Matthias Bisping	03f269c2d7	fixed incorrect pycharme-refactoring	2022-03-25 16:28:00 +01:00
Matthias Bisping	6853d862ed	added comment motivating the implementation of the predict function of the adapter patch	2022-03-25 15:02:02 +01:00
Matthias Bisping	31591bef0f	suppress tf-internal deprication warning	2022-03-25 14:56:47 +01:00
Matthias Bisping	7834a65ff5	added keras estimator wrapper	2022-03-25 14:46:04 +01:00
Matthias Bisping	8b7293be09	introduced estimator-adapter and estimator-adapter-patch	2022-03-25 13:35:03 +01:00
Matthias Bisping	9c9070e8bf	refactoring	2022-03-25 12:24:23 +01:00
Matthias Bisping	e8fb01b4b7	formatting	2022-03-25 11:49:02 +01:00
Matthias Bisping	41f0cc8a41	estimator + model label mapping	2022-03-25 11:42:31 +01:00
Matthias Bisping	ee959346b7	refactoring: estimator + model	2022-03-25 11:23:07 +01:00
				`@ -0,0 +1 @@`
				<mxfile host="app.diagrams.net" modified="2022-03-17T15:35:10.371Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36" etag="b-CbBXg6FXQ9T3Px-oLc" version="17.1.1" type="device"><diagram id="tS3WR_Pr6QhNVK3FqSUP" name="Page-1">1ZZRT6QwEMc/DY8mQHdRX93z9JLbmNzGmNxbQ0daLQzpDrL46a/IsCzinneJcd0XaP+dtsN/fkADscg3V06WeokKbBCHahOIb0Ecnydzf22FphPmyXknZM6oTooGYWWegcWQ1cooWI8CCdGSKcdiikUBKY006RzW47B7tONdS5nBRFil0k7VO6NId+rZPBz0azCZ7neOQh7JZR/MwlpLhfWOJC4DsXCI1LXyzQJs613vSzfv+57RbWIOCvqXCZqW9PBref27aZ7xsQ5vTn/cnvAqT9JW/MCwJuNzR8dZU9Nb4bAqFLSrhYG4qLUhWJUybUdrX3uvacqt70W+yeuCI9jsTTja2uDxAcyBXONDeILonWN04hn366EQUR+jd4qQsCa59tl26cEe32CH/sOt+TueoCONGRbS/kQs2YkHIGoYbFkRvuUTqAmFr1zyu2LlUvhLdjG/HtJlQO/VfOq6AyvJPI3z+HAL4wlwpbp/2V0qODxzUTJmLjo4c8nEkxaWFXcLLPzt4ithKI4BQzHBMOc/l8UvAeLrj9/hQTw9NhBnxwDibB+IB+ZvdvZ5/PnucAx6Gds5S4rLPw==</diagram></mxfile>