107 changed files with 2707 additions and 38581 deletions
--- a/.dvc/config
+++ b/.dvc/config
@ -5,4 +5,4 @@
    url = ssh://vector.iqser.com/research/image-prediction/
    port = 22
 ['remote "azure_remote"']
-    url = azure://image-classification-dvc/
+    url = azure://ic-sa-dvc/
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -1,51 +1,8 @@
 include:
  - project: "Gitlab/gitlab"
-    ref: main
-    file: "/ci-templates/research/dvc.gitlab-ci.yml"
-  - project: "Gitlab/gitlab"
-    ref: main
-    file: "/ci-templates/research/versioning-build-test-release.gitlab-ci.yml"
+    ref: 0.2.6
+    file: "/ci-templates/research/dvc-versioning-build-release.gitlab-ci.yml"

 variables:
  NEXUS_PROJECT_DIR: red
  IMAGENAME: "${CI_PROJECT_NAME}"
-  INTEGRATION_TEST_FILE: "${CI_PROJECT_ID}.pdf"
-  FF_USE_FASTZIP: "true" # enable fastzip - a faster zip implementation that also supports level configuration.
-  ARTIFACT_COMPRESSION_LEVEL: default # can also be set to fastest, fast, slow and slowest. If just enabling fastzip is not enough try setting this to fastest or fast.
-  CACHE_COMPRESSION_LEVEL: default # same as above, but for caches
-  # TRANSFER_METER_FREQUENCY: 5s # will display transfer progress every 5 seconds for artifacts and remote caches. For debugging purposes.
-
-stages:
-  - data
-  - setup
-  - tests
-  - sonarqube
-  - versioning
-  - build
-  - integration-tests
-  - release
-
-docker-build:
-  extends: .docker-build
-  needs:
-    - job: dvc-pull
-      artifacts: true
-    - !reference [.needs-versioning, needs] # leave this line as is
-  
-###################
-# INTEGRATION TESTS
-trigger-integration-tests:
-  extends: .integration-tests
-  # ADD THE MODEL BUILD WHICH SHOULD TRIGGER THE INTEGRATION TESTS
-  # needs:
-  #   - job: docker-build::model_name
-  #     artifacts: true
-  rules:
-    - when: never
-
-#########
-# RELEASE
-release:
-  extends: .release
-  needs:
-    - !reference [.needs-versioning, needs] # leave this line as is
--- a/.python-version
+++ b/.python-version
@ -1 +1 @@
-3.10
+3.8.13
--- a/59
+++ b/59
@ -1,17 +1,11 @@
-FROM python:3.10-slim AS builder
-
-ARG GITLAB_USER
-ARG GITLAB_ACCESS_TOKEN
+FROM python:3.8

+ARG USERNAME
+ARG TOKEN
 ARG PYPI_REGISTRY_RESEARCH=https://gitlab.knecon.com/api/v4/groups/19/-/packages/pypi
 ARG POETRY_SOURCE_REF_RESEARCH=gitlab-research
-
 ARG PYPI_REGISTRY_RED=https://gitlab.knecon.com/api/v4/groups/12/-/packages/pypi
 ARG POETRY_SOURCE_REF_RED=gitlab-red
-
-ARG PYPI_REGISTRY_FFORESIGHT=https://gitlab.knecon.com/api/v4/groups/269/-/packages/pypi
-ARG POETRY_SOURCE_REF_FFORESIGHT=gitlab-fforesight
-
 ARG VERSION=dev

 LABEL maintainer="Research <research@knecon.com>"
@ -19,55 +13,26 @@ LABEL version="${VERSION}"

 WORKDIR /app

-###########
-# ENV SETUP
-ENV PYTHONDONTWRITEBYTECODE=true
 ENV PYTHONUNBUFFERED=true
 ENV POETRY_HOME=/opt/poetry
 ENV PATH="$POETRY_HOME/bin:$PATH"

-RUN apt-get update && \
-    apt-get install -y curl git bash build-essential libffi-dev libssl-dev && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
 RUN curl -sSL https://install.python-poetry.org | python3 -
-RUN poetry --version

-COPY pyproject.toml poetry.lock ./
+COPY ./data ./data
+COPY ./scripts ./scripts
+COPY ./image_prediction ./image_prediction
+COPY pyproject.toml poetry.lock banner.txt config.yaml ./src ./

-RUN poetry config virtualenvs.create true && \
-    poetry config virtualenvs.in-project true && \
+RUN poetry config virtualenvs.create false && \
    poetry config installer.max-workers 10 && \
    poetry config repositories.${POETRY_SOURCE_REF_RESEARCH} ${PYPI_REGISTRY_RESEARCH} && \
-    poetry config http-basic.${POETRY_SOURCE_REF_RESEARCH} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
+    poetry config http-basic.${POETRY_SOURCE_REF_RESEARCH} ${USERNAME} ${TOKEN} && \
    poetry config repositories.${POETRY_SOURCE_REF_RED} ${PYPI_REGISTRY_RED} && \
-    poetry config http-basic.${POETRY_SOURCE_REF_RED} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
-    poetry config repositories.${POETRY_SOURCE_REF_FFORESIGHT} ${PYPI_REGISTRY_FFORESIGHT} && \
-    poetry config http-basic.${POETRY_SOURCE_REF_FFORESIGHT} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
-    poetry install --without=dev -vv --no-interaction --no-root
-
-###############
-# WORKING IMAGE
-FROM python:3.10-slim
-
-WORKDIR /app
-
-# COPY SOURCE CODE FROM BUILDER IMAGE
-COPY --from=builder /app /app
-# COPY BILL OF MATERIALS (BOM)
-COPY bom.json /bom.json
-
-ENV PATH="/app/.venv/bin:$PATH"
-
-###################
-# COPY SOURCE CODE
-COPY ./src ./src
-COPY ./config ./config
-COPY ./data ./data
-COPY banner.txt ./
+    poetry config http-basic.${POETRY_SOURCE_REF_RED} ${USERNAME} ${TOKEN} && \
+    poetry install --without=test -vv --no-interaction --no-root

 EXPOSE 5000
 EXPOSE 8080

-CMD [ "python", "src/serve.py"]
+CMD [ "python", "serve.py"]
--- a/9
+++ b/9
@ -1,4 +1,4 @@
-FROM python:3.10
+FROM python:3.8

 ARG USERNAME
 ARG TOKEN
@ -20,10 +20,9 @@ ENV PATH="$POETRY_HOME/bin:$PATH"
 RUN curl -sSL https://install.python-poetry.org | python3 -

 COPY ./data ./data
+COPY ./image_prediction ./image_prediction
 COPY ./test ./test
-COPY ./config ./config
-COPY ./src ./src
-COPY pyproject.toml poetry.lock banner.txt config.yaml./
+COPY pyproject.toml poetry.lock banner.txt config.yaml ./src ./

 RUN poetry config virtualenvs.create false && \
    poetry config installer.max-workers 10 && \
@ -31,7 +30,7 @@ RUN poetry config virtualenvs.create false && \
    poetry config http-basic.${POETRY_SOURCE_REF_RESEARCH} ${USERNAME} ${TOKEN} && \
    poetry config repositories.${POETRY_SOURCE_REF_RED} ${PYPI_REGISTRY_RED} && \
    poetry config http-basic.${POETRY_SOURCE_REF_RED} ${USERNAME} ${TOKEN} && \
-    poetry install --without=dev -vv --no-interaction --no-root
+    poetry install --without=test -vv --no-interaction --no-root

 EXPOSE 5000
 EXPOSE 8080
--- a/bom.json
+++ b/bom.json
--- a/config.yaml
+++ b/config.yaml
@ -0,0 +1,24 @@
+webserver:
+  host: $SERVER_HOST|"127.0.0.1" # webserver address
+  port: $SERVER_PORT|5000 # webserver port
+
+service:
+  logging_level: $LOGGING_LEVEL_ROOT|INFO # Logging level for service logger
+  verbose: $VERBOSE|False # Service DOES NOT prints document processing progress to stdout
+  batch_size: $BATCH_SIZE|16 # Number of images in memory simultaneously
+  mlflow_run_id: $MLFLOW_RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the service_estimator from
+
+# These variables control filters that are applied to either images, image metadata or service_estimator predictions.
+# The filter result values are reported in the service responses. For convenience the response to a request contains a
+# "filters.allPassed" field, which is set to false if any of the values returned by the filters did not meet its
+# specified required value.
+filters:
+  image_to_page_quotient: # Image size to page size ratio (ratio of geometric means of areas)
+    min: $MIN_REL_IMAGE_SIZE|0.05 # Minimum permissible
+    max: $MAX_REL_IMAGE_SIZE|0.75 # Maximum permissible
+
+  image_width_to_height_quotient: # Image width to height ratio
+    min: $MIN_IMAGE_FORMAT|0.1 # Minimum permissible
+    max: $MAX_IMAGE_FORMAT|10 # Maximum permissible
+
+  min_confidence: $MIN_CONFIDENCE|0.5 # Minimum permissible prediction confidence
--- a/config/pyinfra.toml
+++ b/config/pyinfra.toml
@ -1,68 +0,0 @@
-
-[asyncio]
-max_concurrent_tasks = 10
-
-[dynamic_tenant_queues]
-enabled = true
-
-[metrics.prometheus]
-enabled = true
-prefix = "redactmanager_image_service"
-
-[tracing]
-enabled = true
-# possible values "opentelemetry" | "azure_monitor" (Excpects APPLICATIONINSIGHTS_CONNECTION_STRING environment variable.)
-type = "azure_monitor" 
-
-[tracing.opentelemetry]
-endpoint = "http://otel-collector-opentelemetry-collector.otel-collector:4318/v1/traces"
-service_name = "redactmanager_image_service"
-exporter = "otlp"
-
-[webserver]
-host = "0.0.0.0"
-port = 8080
-
-[rabbitmq]
-host = "localhost"
-port = 5672
-username = ""
-password = ""
-heartbeat = 60
-# Has to be a divider of heartbeat, and shouldn't be too big, since only in these intervals queue interactions happen (like receiving new messages)
-# This is also the minimum time the service needs to process a message
-connection_sleep = 5
-input_queue = "request_queue"
-output_queue = "response_queue"
-dead_letter_queue = "dead_letter_queue"
-
-tenant_event_queue_suffix = "_tenant_event_queue"
-tenant_event_dlq_suffix = "_tenant_events_dlq"
-tenant_exchange_name = "tenants-exchange"
-queue_expiration_time = 300000  # 5 minutes in milliseconds
-
-service_request_queue_prefix = "image_request_queue"
-service_request_exchange_name = "image_request_exchange"
-service_response_exchange_name = "image_response_exchange"
-service_dlq_name = "image_dlq"
-
-[storage]
-backend = "s3"
-
-[storage.s3]
-bucket = "redaction"
-endpoint = "http://127.0.0.1:9000"
-key = ""
-secret = ""
-region = "eu-central-1"
-
-[storage.azure]
-container = "redaction"
-connection_string = ""
-
-[storage.tenant_server]
-public_key = ""
-endpoint =  "http://tenant-user-management:8081/internal-api/tenants"
-
-[kubernetes]
-pod_name = "test_pod"
--- a/config/settings.toml
+++ b/config/settings.toml
@ -1,42 +0,0 @@
-[logging]
-level = "INFO"
-
-[service]
-# Print document processing progress to stdout
-verbose = false
-batch_size = 6
-image_stiching_tolerance = 1  # in pixels
-mlflow_run_id = "fabfb1f192c745369b88cab34471aba7"
-
-# These variables control filters that are applied to either images, image metadata or service_estimator predictions.
-# The filter result values are reported in the service responses. For convenience the response to a request contains a
-# "filters.allPassed" field, which is set to false if any of the values returned by the filters did not meet its
-# specified required value.
-[filters.confidence]
-# Minimum permissible prediction confidence
-min = 0.5
-
-# Image size to page size ratio (ratio of geometric means of areas)
-[filters.image_to_page_quotient]
-min = 0.05
-max = 0.75
-
-[filters.is_scanned_page]
-# Minimum permissible image to page ratio tolerance for a page to be considered scanned.
-# This is only used for filtering small images on scanned pages and is applied before processing the image, therefore
-# superseding the image_to_page_quotient filter that only applies a tag to the image after processing.
-tolerance = 0
-
-# Image width to height ratio
-[filters.image_width_to_height_quotient]
-min = 0.1
-max = 10
-
-# put class specific filters here ['signature', 'formula', 'logo']
-[filters.overrides.signature.image_to_page_quotient]
-max = 0.4
-
-[filters.overrides.logo.image_to_page_quotient]
-min = 0.06
-
-
--- a/src/image_prediction/init.py
+++ b/src/image_prediction/init.py
--- a/src/image_prediction/classifier/init.py
+++ b/src/image_prediction/classifier/init.py
--- a/src/image_prediction/classifier/classifier.py
+++ b/src/image_prediction/classifier/classifier.py
--- a/src/image_prediction/classifier/image_classifier.py
+++ b/src/image_prediction/classifier/image_classifier.py
--- a/src/image_prediction/compositor/init.py
+++ b/src/image_prediction/compositor/init.py
--- a/src/image_prediction/compositor/compositor.py
+++ b/src/image_prediction/compositor/compositor.py
--- a/image_prediction/config.py
+++ b/image_prediction/config.py
@ -0,0 +1,46 @@
+"""Implements a config object with dot-indexing syntax."""
+
+
+from envyaml import EnvYAML
+
+from image_prediction.locations import CONFIG_FILE
+
+
+def _get_item_and_maybe_make_dotindexable(container, item):
+    ret = container[item]
+    return DotIndexable(ret) if isinstance(ret, dict) else ret
+
+
+class DotIndexable:
+    def __init__(self, x):
+        self.x = x
+
+    def get(self, item, default=None):
+        try:
+            return _get_item_and_maybe_make_dotindexable(self.x, item)
+        except KeyError:
+            return default
+
+    def __getattr__(self, item):
+        return _get_item_and_maybe_make_dotindexable(self.x, item)
+
+    def __repr__(self):
+        return self.x.__repr__()
+
+    def __getitem__(self, item):
+        return self.__getattr__(item)
+
+
+class Config:
+    def __init__(self, config_path):
+        self.__config = EnvYAML(config_path)
+
+    def __getattr__(self, item):
+        if item in self.__config:
+            return _get_item_and_maybe_make_dotindexable(self.__config, item)
+
+    def __getitem__(self, item):
+        return self.__getattr__(item)
+
+
+CONFIG = Config(CONFIG_FILE)
--- a/src/image_prediction/default_objects.py
+++ b/src/image_prediction/default_objects.py
--- a/src/image_prediction/encoder/init.py
+++ b/src/image_prediction/encoder/init.py
--- a/src/image_prediction/encoder/encoder.py
+++ b/src/image_prediction/encoder/encoder.py
--- a/src/image_prediction/encoder/encoders/init.py
+++ b/src/image_prediction/encoder/encoders/init.py
--- a/src/image_prediction/encoder/encoders/hash_encoder.py
+++ b/src/image_prediction/encoder/encoders/hash_encoder.py
@ -13,7 +13,7 @@ class HashEncoder(Encoder):
        yield from self.encode(images)


-def hash_image(image: Image.Image) -> str:
+def hash_image(image: Image.Image):
    """See: https://stackoverflow.com/a/49692185/3578468"""
    image = image.resize((10, 10), Image.ANTIALIAS)
    image = image.convert("L")
@ -21,6 +21,4 @@ def hash_image(image: Image.Image) -> str:
    avg_pixel = sum(pixel_data) / len(pixel_data)
    bits = "".join(["1" if (px >= avg_pixel) else "0" for px in pixel_data])
    hex_representation = str(hex(int(bits, 2)))[2:][::-1].upper()
-    # Note: For each 4 leading zeros, the hex representation will be shorter by one character.
-    # To ensure that all hashes have the same length, we pad the hex representation with zeros (also see RED-3813).
-    return hex_representation.zfill(25)
+    return hex_representation
--- a/src/image_prediction/estimator/init.py
+++ b/src/image_prediction/estimator/init.py
--- a/src/image_prediction/estimator/adapter/init.py
+++ b/src/image_prediction/estimator/adapter/init.py
--- a/src/image_prediction/estimator/adapter/adapter.py
+++ b/src/image_prediction/estimator/adapter/adapter.py
--- a/src/image_prediction/estimator/adapter/adapters/init.py
+++ b/src/image_prediction/estimator/adapter/adapters/init.py
--- a/src/image_prediction/estimator/preprocessor/init.py
+++ b/src/image_prediction/estimator/preprocessor/init.py
--- a/src/image_prediction/estimator/preprocessor/preprocessor.py
+++ b/src/image_prediction/estimator/preprocessor/preprocessor.py
--- a/src/image_prediction/estimator/preprocessor/preprocessors/init.py
+++ b/src/image_prediction/estimator/preprocessor/preprocessors/init.py
--- a/src/image_prediction/estimator/preprocessor/preprocessors/basic.py
+++ b/src/image_prediction/estimator/preprocessor/preprocessors/basic.py
--- a/src/image_prediction/estimator/preprocessor/preprocessors/identity.py
+++ b/src/image_prediction/estimator/preprocessor/preprocessors/identity.py
--- a/src/image_prediction/estimator/preprocessor/utils.py
+++ b/src/image_prediction/estimator/preprocessor/utils.py
--- a/src/image_prediction/exceptions.py
+++ b/src/image_prediction/exceptions.py
--- a/src/image_prediction/extraction.py
+++ b/src/image_prediction/extraction.py
--- a/src/image_prediction/flask.py
+++ b/src/image_prediction/flask.py
--- a/src/image_prediction/formatter/init.py
+++ b/src/image_prediction/formatter/init.py
--- a/src/image_prediction/formatter/formatter.py
+++ b/src/image_prediction/formatter/formatter.py
--- a/src/image_prediction/formatter/formatters/init.py
+++ b/src/image_prediction/formatter/formatters/init.py
--- a/src/image_prediction/formatter/formatters/camel_case.py
+++ b/src/image_prediction/formatter/formatters/camel_case.py
--- a/src/image_prediction/formatter/formatters/enum.py
+++ b/src/image_prediction/formatter/formatters/enum.py
--- a/src/image_prediction/formatter/formatters/identity.py
+++ b/src/image_prediction/formatter/formatters/identity.py
--- a/src/image_prediction/formatter/formatters/key_formatter.py
+++ b/src/image_prediction/formatter/formatters/key_formatter.py
--- a/src/image_prediction/image_extractor/init.py
+++ b/src/image_prediction/image_extractor/init.py
--- a/src/image_prediction/image_extractor/extractor.py
+++ b/src/image_prediction/image_extractor/extractor.py
--- a/src/image_prediction/image_extractor/extractors/init.py
+++ b/src/image_prediction/image_extractor/extractors/init.py
--- a/src/image_prediction/image_extractor/extractors/mock.py
+++ b/src/image_prediction/image_extractor/extractors/mock.py
--- a/src/image_prediction/image_extractor/extractors/parsable.py
+++ b/src/image_prediction/image_extractor/extractors/parsable.py
@ -3,7 +3,7 @@ import json
 import traceback
 from _operator import itemgetter
 from functools import partial, lru_cache
-from itertools import chain, starmap, filterfalse, tee
+from itertools import chain, starmap, filterfalse
 from operator import itemgetter, truth
 from typing import Iterable, Iterator, List, Union

@ -11,10 +11,9 @@ import fitz
 import numpy as np
 from PIL import Image
 from funcy import merge, pluck, compose, rcompose, remove, keep
-from scipy.stats import gmean

 from image_prediction.config import CONFIG
-from image_prediction.exceptions import InvalidBox
+from image_prediction.exceptions import InvalidBox, BadXref
 from image_prediction.formatter.formatters.enum import EnumFormatter
 from image_prediction.image_extractor.extractor import ImageExtractor, ImageMetadataPair
 from image_prediction.info import Info
@ -35,7 +34,7 @@ class ParsablePDFImageExtractor(ImageExtractor):
            tolerance: The tolerance in pixels for the distance between images, beyond which they will not be stitched
                together
        """
-        self.doc: fitz.Document = None
+        self.doc: fitz.fitz.Document = None
        self.verbose = verbose
        self.tolerance = tolerance

@ -48,7 +47,7 @@ class ParsablePDFImageExtractor(ImageExtractor):

        yield from image_metadata_pairs

-    def __process_images_on_page(self, page: fitz.Page):
+    def __process_images_on_page(self, page: fitz.fitz.Page):
        metadata = extract_valid_metadata(self.doc, page)
        images = get_images_on_page(self.doc, metadata)

@ -65,13 +64,9 @@ class ParsablePDFImageExtractor(ImageExtractor):

    @staticmethod
    def __filter_valid_images(image_metadata_pairs: Iterable[ImageMetadataPair]) -> Iterator[ImageMetadataPair]:
-        def validate_image_is_not_corrupt(image: Image.Image, metadata: dict):
-            """See RED-5148: Some images are corrupt and cannot be processed by the image classifier. This function
-            filters out such images by trying to resize and convert them to RGB. If this fails, the image is considered
-            corrupt and is dropped.
-            TODO: find cleaner solution
-            """
+        def validate(image: Image.Image, metadata: dict):
            try:
+                # TODO: stand-in heuristic for testing if image is valid => find cleaner solution (RED-5148)
                image.resize((100, 100)).convert("RGB")
                return ImageMetadataPair(image, metadata)
            except (OSError, Exception) as err:
@ -79,41 +74,7 @@ class ParsablePDFImageExtractor(ImageExtractor):
                logger.warning(f"Invalid image encountered. Image metadata:\n{metadata}\n\n{traceback.format_exc()}")
                return None

-        def filter_small_images_on_scanned_pages(image_metadata_pairs) -> Iterable[ImageMetadataPair]:
-            """See RED-9746: Small images on scanned pages should be dropped, so they are not classified. This is a
-            heuristic to filter out images that are too small in relation to the page size if they are on a scanned page.
-
-            The ratio is computed as the geometric mean of the width and height of the image divided by the geometric mean
-            of the width and height of the page. If the ratio is below the threshold, the image is dropped.
-            """
-
-            def image_is_a_scanned_page(image_metadata_pair: ImageMetadataPair) -> bool:
-                tolerance = CONFIG.filters.is_scanned_page.tolerance
-                width_ratio = image_metadata_pair.metadata[Info.WIDTH] / image_metadata_pair.metadata[Info.PAGE_WIDTH]
-                height_ratio = (
-                    image_metadata_pair.metadata[Info.HEIGHT] / image_metadata_pair.metadata[Info.PAGE_HEIGHT]
-                )
-                return width_ratio >= 1 - tolerance and height_ratio >= 1 - tolerance
-
-            def image_fits_geometric_mean_ratio(image_metadata_pair: ImageMetadataPair) -> bool:
-                min_ratio = CONFIG.filters.image_to_page_quotient.min
-                metadatum = image_metadata_pair.metadata
-                image_gmean = gmean([metadatum[Info.WIDTH], metadatum[Info.HEIGHT]])
-                page_gmean = gmean([metadatum[Info.PAGE_WIDTH], metadatum[Info.PAGE_HEIGHT]])
-                ratio = image_gmean / page_gmean
-                return ratio >= min_ratio
-
-            pairs, pairs_copy = tee(image_metadata_pairs)
-
-            if any(map(image_is_a_scanned_page, pairs_copy)):
-                logger.debug("Scanned page detected, filtering out small images ...")
-                return filter(image_fits_geometric_mean_ratio, pairs)
-            else:
-                return pairs
-
-        image_metadata_pairs = filter_small_images_on_scanned_pages(image_metadata_pairs)
-
-        return filter(truth, starmap(validate_image_is_not_corrupt, image_metadata_pairs))
+        return filter(truth, starmap(validate, image_metadata_pairs))


 def extract_pages(doc, page_range):
@ -130,12 +91,13 @@ def get_images_on_page(doc, metadata):
    yield from images


-def extract_valid_metadata(doc: fitz.Document, page: fitz.Page):
-    metadata = get_metadata_for_images_on_page(page)
-    metadata = filter_valid_metadata(metadata)
-    metadata = add_alpha_channel_info(doc, metadata)
-
-    return list(metadata)
+def extract_valid_metadata(doc: fitz.fitz.Document, page: fitz.fitz.Page):
+    return compose(
+        list,
+        partial(add_alpha_channel_info, doc),
+        filter_valid_metadata,
+        get_metadata_for_images_on_page,
+    )(page)


 def get_metadata_for_images_on_page(page: fitz.Page):
@ -191,7 +153,7 @@ def xref_to_image(doc, xref) -> Union[Image.Image, None]:
        return


-def convert_pixmap_to_array(pixmap: fitz.Pixmap):
+def convert_pixmap_to_array(pixmap: fitz.fitz.Pixmap):
    array = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(pixmap.h, pixmap.w, pixmap.n)
    array = _normalize_channels(array)
    return array
@ -210,6 +172,7 @@ def _normalize_channels(array: np.ndarray):


 def get_image_metadata(image_info):
+
    xref, coords = itemgetter("xref", "bbox")(image_info)
    x1, y1, x2, y2 = map(rounder, coords)

@ -244,11 +207,7 @@ def add_alpha_channel_info(doc, metadata):

@lru_cache(maxsize=None)
 def load_image_handle_from_xref(doc, xref):
-    try:
-        return doc.extract_image(xref)
-    except ValueError:
-        logger.debug(f"Xref {xref} is invalid, skipping extraction ...")
-        return
+    return doc.extract_image(xref)


 rounder = rcompose(round, int)
@ -265,6 +224,7 @@ def get_page_metadata(page):


 def has_alpha_channel(doc, xref):
+
    maybe_image = load_image_handle_from_xref(doc, xref)
    maybe_smask = maybe_image["smask"] if maybe_image else None

--- a/src/image_prediction/info.py
+++ b/src/image_prediction/info.py
--- a/src/image_prediction/label_mapper/init.py
+++ b/src/image_prediction/label_mapper/init.py
--- a/src/image_prediction/label_mapper/mapper.py
+++ b/src/image_prediction/label_mapper/mapper.py
--- a/src/image_prediction/label_mapper/mappers/init.py
+++ b/src/image_prediction/label_mapper/mappers/init.py
--- a/src/image_prediction/label_mapper/mappers/numeric.py
+++ b/src/image_prediction/label_mapper/mappers/numeric.py
--- a/src/image_prediction/label_mapper/mappers/probability.py
+++ b/src/image_prediction/label_mapper/mappers/probability.py
--- a/image_prediction/locations.py
+++ b/image_prediction/locations.py
@ -0,0 +1,16 @@
+"""Defines constant paths relative to the module root path."""
+
+from pathlib import Path
+
+MODULE_DIR = Path(__file__).resolve().parents[0]
+PACKAGE_ROOT_DIR = MODULE_DIR.parents[0]
+
+CONFIG_FILE = PACKAGE_ROOT_DIR / "config.yaml"
+BANNER_FILE = PACKAGE_ROOT_DIR / "banner.txt"
+
+DATA_DIR = PACKAGE_ROOT_DIR / "data"
+MLRUNS_DIR = str(DATA_DIR / "mlruns")
+
+TEST_DIR = PACKAGE_ROOT_DIR / "test"
+TEST_DATA_DIR = TEST_DIR / "data"
+TEST_DATA_DIR_DVC = TEST_DIR / "data.dvc"
--- a/src/image_prediction/model_loader/init.py
+++ b/src/image_prediction/model_loader/init.py
--- a/src/image_prediction/model_loader/database/init.py
+++ b/src/image_prediction/model_loader/database/init.py
--- a/src/image_prediction/model_loader/database/connector.py
+++ b/src/image_prediction/model_loader/database/connector.py
--- a/src/image_prediction/model_loader/database/connectors/init.py
+++ b/src/image_prediction/model_loader/database/connectors/init.py
--- a/src/image_prediction/model_loader/database/connectors/mock.py
+++ b/src/image_prediction/model_loader/database/connectors/mock.py
--- a/src/image_prediction/model_loader/loader.py
+++ b/src/image_prediction/model_loader/loader.py
--- a/src/image_prediction/model_loader/loaders/init.py
+++ b/src/image_prediction/model_loader/loaders/init.py
--- a/src/image_prediction/model_loader/loaders/mlflow.py
+++ b/src/image_prediction/model_loader/loaders/mlflow.py
--- a/src/image_prediction/pipeline.py
+++ b/src/image_prediction/pipeline.py
@ -1,10 +1,8 @@
 import os
 from functools import lru_cache, partial
 from itertools import chain, tee
-from typing import Iterable, Any

 from funcy import rcompose, first, compose, second, chunks, identity, rpartial
-from kn_utils.logging import logger
 from tqdm import tqdm

 from image_prediction.config import CONFIG
@ -23,7 +21,6 @@ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

@lru_cache(maxsize=None)
 def load_pipeline(**kwargs):
-    logger.info(f"Loading pipeline with kwargs: {kwargs}")
    model_loader = get_mlflow_model_loader(MLRUNS_DIR)
    model_identifier = CONFIG.service.mlflow_run_id

@ -55,7 +52,7 @@ class Pipeline:
        join = compose(starlift(lambda prd, rpr, mdt: {"classification": prd, **mdt, "representation": rpr}), star(zip))

        #                       />--classify--\
-        # --extract-->--split--+->--encode---->+--join-->reformat-->filter_duplicates
+        # --extract-->--split--+->--encode---->+--join-->reformat
        #                       \>--identity--/

        self.pipe = rcompose(
@ -64,7 +61,6 @@ class Pipeline:
            pairwise_apply(classify, represent, identity),  # ... apply functions to the streams pairwise
            join,  # ... the streams by zipping
            reformat,  # ... the items
-            filter_duplicates,  # ... filter out duplicate images
        )

    def __call__(self, pdf: bytes, page_range: range = None):
@ -74,32 +70,3 @@ class Pipeline:
            unit=" images",
            disable=not self.verbose,
        )
-
-
-def filter_duplicates(metadata: Iterable[dict[str, Any]]) -> Iterable[dict[str, Any]]:
-    """Filter out duplicate images from the `position` (image coordinates) and `page`, preferring the one with
-    `allPassed` set to True.
-    See RED-10765 (RM-241): Removed redactions reappear for why this is necessary.
-    """
-    keep = dict()
-    for image_meta in metadata:
-        key: tuple[int, int, int, int, int] = (
-            image_meta["position"]["x1"],
-            image_meta["position"]["x2"],
-            image_meta["position"]["y1"],
-            image_meta["position"]["y2"],
-            image_meta["position"]["pageNumber"],
-        )
-        if key in keep:
-            logger.warning(
-                f"Duplicate image found: x1={key[0]}, x2={key[1]}, y1={key[2]}, y2={key[3]}, pageNumber={key[4]}"
-            )
-            if image_meta["filters"]["allPassed"]:
-                logger.warning("Setting the image with allPassed flag set to True")
-                keep[key] = image_meta
-            else:
-                logger.warning("Keeping the previous image since the current image has allPassed flag set to False")
-        else:
-            keep[key] = image_meta
-
-    yield from keep.values()
--- a/src/image_prediction/redai_adapter/init.py
+++ b/src/image_prediction/redai_adapter/init.py
--- a/src/image_prediction/redai_adapter/efficient_net_wrapper.py
+++ b/src/image_prediction/redai_adapter/efficient_net_wrapper.py
--- a/src/image_prediction/redai_adapter/mlflow.py
+++ b/src/image_prediction/redai_adapter/mlflow.py
--- a/src/image_prediction/redai_adapter/model.py
+++ b/src/image_prediction/redai_adapter/model.py
--- a/src/image_prediction/redai_adapter/model_wrapper.py
+++ b/src/image_prediction/redai_adapter/model_wrapper.py
--- a/src/image_prediction/stitching/init.py
+++ b/src/image_prediction/stitching/init.py
--- a/src/image_prediction/stitching/grouping.py
+++ b/src/image_prediction/stitching/grouping.py
--- a/src/image_prediction/stitching/merging.py
+++ b/src/image_prediction/stitching/merging.py
--- a/src/image_prediction/stitching/split_mapper.py
+++ b/src/image_prediction/stitching/split_mapper.py
--- a/src/image_prediction/stitching/stitching.py
+++ b/src/image_prediction/stitching/stitching.py
--- a/src/image_prediction/stitching/utils.py
+++ b/src/image_prediction/stitching/utils.py
--- a/src/image_prediction/transformer/init.py
+++ b/src/image_prediction/transformer/init.py
--- a/src/image_prediction/transformer/transformer.py
+++ b/src/image_prediction/transformer/transformer.py
--- a/src/image_prediction/transformer/transformers/init.py
+++ b/src/image_prediction/transformer/transformers/init.py
--- a/src/image_prediction/transformer/transformers/coordinate/init.py
+++ b/src/image_prediction/transformer/transformers/coordinate/init.py
--- a/src/image_prediction/transformer/transformers/coordinate/coordinate_transformer.py
+++ b/src/image_prediction/transformer/transformers/coordinate/coordinate_transformer.py
--- a/src/image_prediction/transformer/transformers/coordinate/fitz.py
+++ b/src/image_prediction/transformer/transformers/coordinate/fitz.py
--- a/src/image_prediction/transformer/transformers/coordinate/fpdf.py
+++ b/src/image_prediction/transformer/transformers/coordinate/fpdf.py
--- a/src/image_prediction/transformer/transformers/coordinate/pdfnet.py
+++ b/src/image_prediction/transformer/transformers/coordinate/pdfnet.py
--- a/src/image_prediction/transformer/transformers/response.py
+++ b/src/image_prediction/transformer/transformers/response.py
@ -1,8 +1,13 @@
+import json
 import math
-from dynaconf import Dynaconf
+import os
+from functools import lru_cache
 from operator import itemgetter

+from funcy import first
+
 from image_prediction.config import CONFIG
+from image_prediction.exceptions import ParsingError
 from image_prediction.transformer.transformer import Transformer
 from image_prediction.utils import get_logger

@ -27,22 +32,21 @@ def build_image_info(data: dict) -> dict:
    geometric_quotient = round(compute_geometric_quotient(page_width, page_height, x2, x1, y2, y1), 4)

    min_image_to_page_quotient_breached = bool(
-        geometric_quotient < get_class_specific_filter_value(label, CONFIG, "image_to_page_quotient", "min")
+        geometric_quotient < get_class_specific_min_image_to_page_quotient(label)
    )
    max_image_to_page_quotient_breached = bool(
-        geometric_quotient > get_class_specific_filter_value(label, CONFIG, "image_to_page_quotient", "max")
+        geometric_quotient > get_class_specific_max_image_to_page_quotient(label)
    )

    min_image_width_to_height_quotient_breached = bool(
-        width / height < get_class_specific_filter_value(label, CONFIG, "image_width_to_height_quotient", "min")
+        width / height < get_class_specific_min_image_width_to_height_quotient(label)
    )
    max_image_width_to_height_quotient_breached = bool(
-        width / height > get_class_specific_filter_value(label, CONFIG, "image_width_to_height_quotient", "max")
+        width / height > get_class_specific_max_image_width_to_height_quotient(label)
    )

    min_confidence_breached = bool(
-        max(classification["probabilities"].values())
-        < get_class_specific_filter_value(label, CONFIG, "confidence", "min")
+        max(classification["probabilities"].values()) < get_class_specific_min_classification_confidence(label)
    )

    image_info = {
@ -86,15 +90,65 @@ def compute_geometric_quotient(page_width, page_height, x2, x1, y2, y1):
    return image_area_sqrt / page_area_sqrt


-def get_class_specific_filter_value(label: str, settings: Dynaconf, filter_type: str, bound: str = None):
-    try:
-        value = (
-            settings.filters.overrides[label][filter_type][bound]
-            if bound
-            else settings.filters.overrides[label][filter_type]
-        )
-        logger.warning(f"Using {label=} specific {bound=} {filter_type=} {value=}.")
-    except KeyError:
-        value = settings.filters[filter_type][bound]
+def get_class_specific_min_image_to_page_quotient(label, table=None):
+    return get_class_specific_value(
+        "REL_IMAGE_SIZE", label, "min", CONFIG.filters.image_to_page_quotient.min, table=table
+    )

-    return value
+
+def get_class_specific_max_image_to_page_quotient(label, table=None):
+    return get_class_specific_value(
+        "REL_IMAGE_SIZE", label, "max", CONFIG.filters.image_to_page_quotient.max, table=table
+    )
+
+
+def get_class_specific_min_image_width_to_height_quotient(label, table=None):
+    return get_class_specific_value(
+        "IMAGE_FORMAT", label, "min", CONFIG.filters.image_width_to_height_quotient.min, table=table
+    )
+
+
+def get_class_specific_max_image_width_to_height_quotient(label, table=None):
+    return get_class_specific_value(
+        "IMAGE_FORMAT", label, "max", CONFIG.filters.image_width_to_height_quotient.max, table=table
+    )
+
+
+def get_class_specific_min_classification_confidence(label, table=None):
+    return get_class_specific_value("CONFIDENCE", label, "min", CONFIG.filters.min_confidence, table=table)
+
+
+def get_class_specific_value(prefix, label, bound, fallback_value, table=None):
+    def fallback():
+        return fallback_value
+
+    def success():
+        threshold_map = parse_env_var(prefix, table=table) or {}
+        value = threshold_map.get(label, {}).get(bound)
+        if value:
+            logger.debug(f"Using class '{label}' specific {bound} {prefix.lower().replace('_', '-')} value.")
+        return value
+
+    assert bound in ["min", "max"]
+
+    return success() or fallback()
+
+
+@lru_cache(maxsize=None)
+def parse_env_var(prefix, table=None):
+    table = table or os.environ
+    head = first(filter(lambda s: s == prefix, table))
+    if head:
+        try:
+            return parse_env_var_value(table[head])
+        except ParsingError as err:
+            logger.warning(err)
+    else:
+        return None
+
+
+def parse_env_var_value(env_var_value):
+    try:
+        return json.loads(env_var_value)
+    except Exception as err:
+        raise ParsingError(f"Failed to parse {env_var_value}") from err
--- a/src/image_prediction/utils.py
+++ b/src/image_prediction/utils.py
--- a/src/image_prediction/utils/init.py
+++ b/src/image_prediction/utils/init.py
--- a/src/image_prediction/utils/banner.py
+++ b/src/image_prediction/utils/banner.py
--- a/src/image_prediction/utils/generic.py
+++ b/src/image_prediction/utils/generic.py
--- a/image_prediction/utils/logger.py
+++ b/image_prediction/utils/logger.py
@ -0,0 +1,27 @@
+import logging
+
+from image_prediction.config import CONFIG
+
+
+def make_logger_getter():
+    logger = logging.getLogger("imclf")
+    logger.propagate = False
+
+    handler = logging.StreamHandler()
+    handler.setLevel(CONFIG.service.logging_level)
+
+    log_format = "%(asctime)s %(levelname)-8s %(message)s"
+    formatter = logging.Formatter(log_format, datefmt="%Y-%m-%d %H:%M:%S")
+
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+
+    logger.setLevel(CONFIG.service.logging_level)
+
+    def get_logger():
+        return logger
+
+    return get_logger
+
+
+get_logger = make_logger_getter()
--- a/src/image_prediction/utils/pdf_annotation.py
+++ b/src/image_prediction/utils/pdf_annotation.py
@ -56,8 +56,7 @@ def annotate_image(doc, image_info):

 def init():
    PDFNet.Initialize(
-        # "Knecon AG(en.knecon.swiss):OEM:DDA-R::WL+:AMS(20211029):BECC974307DAB4F34B513BC9B2531B24496F6FCB83CD8AC574358A959730B622FABEF5C7"
-        "Knecon AG:OEM:DDA-R::WL+:AMS(20270129):EA5FDFB23C7F36B9C2AE606F4F0D9197DE1FB649119F9730B622FABEF5C7"
+        "Knecon AG(en.knecon.swiss):OEM:DDA-R::WL+:AMS(20211029):BECC974307DAB4F34B513BC9B2531B24496F6FCB83CD8AC574358A959730B622FABEF5C7"
    )


--- a/src/image_prediction/utils/process_wrapping.py
+++ b/src/image_prediction/utils/process_wrapping.py
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,20 +1,18 @@
 [tool.poetry]
 name = "image-classification-service"
-version = "2.17.0"
+version = "1.33.0"
 description = ""
 authors = ["Team Research <research@knecon.com>"]
 readme = "README.md"
-packages = [{ include = "image_prediction", from = "src" }]
+packages = [{ include = "image_prediction" }]

 [tool.poetry.dependencies]
-python = ">=3.10,<3.11"
-# FIXME: This should be recent pyinfra, but the recent protobuf packages are not compatible with tensorflow 2.9.0, also
-#  see RED-9948.
-pyinfra = { version = "3.4.2", source = "gitlab-research" }
-kn-utils = { version = ">=0.4.0", source = "gitlab-research" }
+python = "~3.8"
 dvc = "^2.34.0"
 dvc-ssh = "^2.20.0"
 dvc-azure = "^2.21.2"
+pyinfra = { version = "1.6.0", source = "gitlab-research" }
+kn-utils = { version = "0.1.4", source = "gitlab-research" }
 Flask = "^2.1.1"
 requests = "^2.27.1"
 iteration-utilities = "^0.11.0"
@ -25,25 +23,22 @@ mlflow = "^1.24.0"
 numpy = "^1.22.3"
 tqdm = "^4.64.0"
 pandas = "^1.4.2"
-# FIXME: Our current model significantly changes the prediction behaviour when using newer tensorflow (/ protobuf)
-#  versions which is introduuced by pyinfra updates using newer protobuf versions, see RED-9948.
-tensorflow = "2.9.0"
-protobuf = "^3.20"
+tensorflow = "^2.8.0"
 pytest = "^7.1.0"
-funcy = "^2"
+funcy = "^1.17"
 PyMuPDF = "^1.19.6"
 fpdf = "^1.7.2"
 coverage = "^6.3.2"
 Pillow = "^9.1.0"
 pdf2image = "^1.16.0"
 frozendict = "^2.3.0"
+protobuf = "^3.20.0"
 fsspec = "^2022.11.0"
 PyMonad = "^2.4.0"
 pdfnetpython3 = "9.4.2"
-loguru = "^0.7.0"
-cyclonedx-bom = "^4.5.0"
+loguru = "^0.6.0"

-[tool.poetry.group.dev.dependencies]
+[tool.poetry.group.test.dependencies]
 pytest = "^7.0.1"
 pymonad = "^2.4.0"
 pylint = "^2.17.4"
--- a/scripts/debug/debug.py
+++ b/scripts/debug/debug.py
@ -1,46 +0,0 @@
-"""Script to debug RED-9948. The predictions unexpectedly changed for some images, and we need to understand why."""
-
-import json
-import random
-from pathlib import Path
-
-import numpy as np
-import tensorflow as tf
-from kn_utils.logging import logger
-
-from image_prediction.config import CONFIG
-from image_prediction.pipeline import load_pipeline
-
-
-def process_pdf(pipeline, pdf_path, page_range=None):
-    with open(pdf_path, "rb") as f:
-        logger.info(f"Processing {pdf_path}")
-        predictions = list(pipeline(f.read(), page_range=page_range))
-
-    return predictions
-
-
-def ensure_seeds():
-    seed = 42
-    np.random.seed(seed)
-    random.seed(seed)
-    tf.random.set_seed(seed)
-
-
-def debug_info():
-    devices = tf.config.list_physical_devices()
-    print("Available devices:", devices)
-
-
-if __name__ == "__main__":
-    # For in container debugging, copy the file and adjust the path.
-    debug_file_path = Path(__file__).parents[2] / "test" / "data" / "RED-9948" / "SYNGENTA_EFSA_sanitisation_GFL_v2"
-    ensure_seeds()
-    debug_info()
-
-    pipeline = load_pipeline(verbose=True, batch_size=CONFIG.service.batch_size)
-    predictions = process_pdf(pipeline, debug_file_path)
-    # This is the image that has the wrong prediction mentioned in RED-9948. The predictions should inconclusive, and
-    # the flag all passed should be false.
-    predictions = [x for x in predictions if x["representation"] == "FA30F080F0C031CE17E8CF237"]
-    print(json.dumps(predictions, indent=2))
--- a/scripts/docker_build_run.sh
+++ b/scripts/docker_build_run.sh
@ -1,6 +1,6 @@
-docker build -t --platform linux/amd64 image-clsasification-service:$(poetry version -s)-dev \                                                               
+docker build -t image-clsasification-service:$(poetry version -s)-dev \                                                               
    -f Dockerfile \
-    --build-arg GITLAB_USER=$GITLAB_USER \
-    --build-arg GITLAB_ACCESS_TOKEN=$GITLAB_ACCESS_TOKEN \
+    --build-arg USERNAME=$GITLAB_USER \
+    --build-arg TOKEN=$GITLAB_ACCESS_TOKEN \
    . && \
 docker run -it --rm image-clsasification-service:$(poetry version -s)-dev
--- a/scripts/run_pipeline.py
+++ b/scripts/run_pipeline.py
@ -3,15 +3,12 @@ import json
 import os
 from glob import glob

-from image_prediction.config import CONFIG
 from image_prediction.pipeline import load_pipeline
 from image_prediction.utils import get_logger
 from image_prediction.utils.pdf_annotation import annotate_pdf

 logger = get_logger()

-logger.setLevel("DEBUG")
-

 def parse_args():
    parser = argparse.ArgumentParser()
@ -38,7 +35,7 @@ def process_pdf(pipeline, pdf_path, page_range=None):


 def main(args):
-    pipeline = load_pipeline(verbose=CONFIG.service.verbose, batch_size=CONFIG.service.batch_size, tolerance=CONFIG.service.image_stiching_tolerance)
+    pipeline = load_pipeline(verbose=True, tolerance=3)

    if os.path.isfile(args.input):
        pdf_paths = [args.input]
--- a/src/image_prediction/config.py
+++ b/src/image_prediction/config.py
@ -1,7 +0,0 @@
-from pathlib import Path
-
-from pyinfra.config.loader import load_settings
-
-from image_prediction.locations import PROJECT_ROOT_DIR
-
-CONFIG = load_settings(root_path=PROJECT_ROOT_DIR, settings_path="config")
--- a/src/image_prediction/locations.py
+++ b/src/image_prediction/locations.py
@ -1,18 +0,0 @@
-"""Defines constant paths relative to the module root path."""
-
-from pathlib import Path
-
-# FIXME: move these paths to config, only depending on 'ROOT_PATH' environment variable.
-MODULE_DIR = Path(__file__).resolve().parents[0]
-PACKAGE_ROOT_DIR = MODULE_DIR.parents[0]
-PROJECT_ROOT_DIR = PACKAGE_ROOT_DIR.parents[0]
-
-CONFIG_FILE = PROJECT_ROOT_DIR / "config" / "settings.toml"
-BANNER_FILE = PROJECT_ROOT_DIR / "banner.txt"
-
-DATA_DIR = PROJECT_ROOT_DIR / "data"
-MLRUNS_DIR = str(DATA_DIR / "mlruns")
-
-TEST_DIR = PROJECT_ROOT_DIR / "test"
-TEST_DATA_DIR = TEST_DIR / "data"
-TEST_DATA_DIR_DVC = TEST_DIR / "data.dvc"
--- a/src/image_prediction/utils/logger.py
+++ b/src/image_prediction/utils/logger.py
@ -1,4 +0,0 @@
-import kn_utils
-
-# TODO: remove this module and use the `get_logger` function from the `kn_utils` package.
-get_logger = kn_utils.get_logger
--- a/src/serve.py
+++ b/src/serve.py
@ -1,15 +1,17 @@
-from sys import stdout
-
-from kn_utils.logging import logger
-from pyinfra.examples import start_standard_queue_consumer
-from pyinfra.queue.callback import make_download_process_upload_callback
-
-from image_prediction.config import CONFIG
+from image_prediction import logger
+from image_prediction.config import Config
+from image_prediction.locations import CONFIG_FILE
 from image_prediction.pipeline import load_pipeline
 from image_prediction.utils.banner import load_banner
 from image_prediction.utils.process_wrapping import wrap_in_process
+from pyinfra import config
+from pyinfra.payload_processing.processor import make_payload_processor
+from pyinfra.queue.queue_manager import QueueManager

-logger.reconfigure(sink=stdout, level=CONFIG.logging.level)
+PYINFRA_CONFIG = config.get_config()
+IMAGE_CONFIG = Config(CONFIG_FILE)
+
+logger.setLevel(PYINFRA_CONFIG.logging_level_root)


 # A component of the processing pipeline (probably tensorflow) does not release allocated memory (see RED-4206).
@ -17,16 +19,18 @@ logger.reconfigure(sink=stdout, level=CONFIG.logging.level)
 # Workaround: Manage Memory with the operating system, by wrapping the processing in a sub-process.
 # FIXME: Find more fine-grained solution or if the problem occurs persistently for python services,
@wrap_in_process
-def process_data(data: bytes, _message: dict) -> list:
-    pipeline = load_pipeline(verbose=CONFIG.service.verbose, batch_size=CONFIG.service.batch_size, tolerance=CONFIG.service.image_stiching_tolerance)
+def process_data(data: bytes) -> list:
+    pipeline = load_pipeline(verbose=IMAGE_CONFIG.service.verbose, batch_size=IMAGE_CONFIG.service.batch_size)
    return list(pipeline(data))


 def main():
    logger.info(load_banner())

-    callback = make_download_process_upload_callback(process_data, CONFIG)
-    start_standard_queue_consumer(callback, CONFIG)
+    process_payload = make_payload_processor(process_data, config=PYINFRA_CONFIG)
+
+    queue_manager = QueueManager(PYINFRA_CONFIG)
+    queue_manager.start_consuming(process_payload)


 if __name__ == "__main__":
--- a/test/conftest.py
+++ b/test/conftest.py
@ -1,3 +1,10 @@
+import logging
+
+import pytest
+
+from image_prediction.utils import get_logger
+
+
 pytest_plugins = [
    "test.fixtures.extractor",
    "test.fixtures.image",
@ -10,5 +17,14 @@ pytest_plugins = [
    "test.fixtures.parameters",
    "test.fixtures.pdf",
    "test.fixtures.target",
-    "test.unit_tests.image_stitching_test",
+    "test.unit_tests.image_stitching_test"
 ]
+
+
+@pytest.fixture(autouse=True)
+def mute_logger():
+    logger = get_logger()
+    level = logger.level
+    logger.setLevel(logging.CRITICAL + 1)
+    yield
+    logger.setLevel(level)
--- a/test/data.dvc
+++ b/test/data.dvc
@ -1,5 +1,5 @@
 outs:
- md5: 08bf8a63f04b3f19f859008556699708.dir
-  size: 7979836
-  nfiles: 7
+- md5: 4b0fec291ce0661b3efbbd8b80f4f514.dir
+  size: 107332
+  nfiles: 4
  path: data
--- a/Show More
+++ b/Show More
 @ -1 +1 @@
 .10
 .8.13