applied black

refactoring
2022-03-16 13:21:52 +01:00 · 2022-03-16 13:21:20 +01:00
176 changed files with 842 additions and 45575 deletions
--- a/.coveragerc
+++ b/.coveragerc
@ -1,63 +0,0 @@
-# .coveragerc to control coverage.py
-[run]
-branch = True
-parallel = True
-command_line = -m pytest
-concurrency = multiprocessing
-omit =
-    */site-packages/*
-    */distutils/*
-    */test/*
-    */__init__.py
-    */setup.py
-	*/venv/*
-	*/env/*
-	*/build_venv/*
-	*/build_env/*
-	*/utils/banner.py
-	*/utils/logger.py
-	*/src/*
-source =
-    image_prediction
-relative_files = True
-data_file = .coverage
-
-[report]
-# Regexes for lines to exclude from consideration
-exclude_lines =
-    # Have to re-enable the standard pragma
-    pragma: no cover
-
-    # Don't complain about missing debug-only code:
-    def __repr__
-    if self\.debug
-
-    # Don't complain if tests don't hit defensive assertion code:
-    raise AssertionError
-    raise NotImplementedError
-
-    # Don't complain if non-runnable code isn't run:
-    if 0:
-    if __name__ == .__main__.:
-omit =
-    */site-packages/*
-    */distutils/*
-    */test/*
-    */__init__.py
-    */setup.py
-	*/venv/*
-	*/env/*
-	*/build_venv/*
-	*/build_env/*
-	*/utils/banner.py
-	*/utils/logger.py
-	*/src/*
-	*/pdf_annotation.py
-
-ignore_errors = True
-
-[html]
-directory = reports
-
-[xml]
-output = reports/coverage.xml
--- a/.dvc/config
+++ b/.dvc/config
@ -1,8 +1,5 @@
 [core]
-    remote = azure_remote
-    autostage = true
+    remote = vector
 ['remote "vector"']
-    url = ssh://vector.iqser.com/research/image-prediction/
+    url = ssh://vector.iqser.com/research/image_service/
    port = 22
-['remote "azure_remote"']
-    url = azure://image-classification-dvc/
--- a/.gitignore
+++ b/.gitignore
@ -1,8 +1,7 @@
 .vscode/
 *.h5
-*venv
+/venv/
 .idea/
-src/data

 !.gitignore
 *.project
@ -33,9 +32,6 @@ src/data
 **/classpath-data.json
 **/dependencies-and-licenses-overview.txt

-.coverage
-.coverage\.*\.*
-

 *__pycache__
 *.egg-info*
@ -48,6 +44,7 @@ src/data
 *misc

 /coverage_html_report/
+.coverage

 # Created by https://www.toptal.com/developers/gitignore/api/linux,pycharm
 # Edit at https://www.toptal.com/developers/gitignore?templates=linux,pycharm
@ -173,4 +170,6 @@ fabric.properties
 # https://plugins.jetbrains.com/plugin/12206-codestream
 .idea/codestream.xml

-# End of https://www.toptal.com/developers/gitignore/api/linux,pycharm
+# End of https://www.toptal.com/developers/gitignore/api/linux,pycharm
+/image_prediction/data/mlruns/
+/data/mlruns/
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -1,51 +0,0 @@
-include:
-  - project: "Gitlab/gitlab"
-    ref: main
-    file: "/ci-templates/research/dvc.gitlab-ci.yml"
-  - project: "Gitlab/gitlab"
-    ref: main
-    file: "/ci-templates/research/versioning-build-test-release.gitlab-ci.yml"
-
-variables:
-  NEXUS_PROJECT_DIR: red
-  IMAGENAME: "${CI_PROJECT_NAME}"
-  INTEGRATION_TEST_FILE: "${CI_PROJECT_ID}.pdf"
-  FF_USE_FASTZIP: "true" # enable fastzip - a faster zip implementation that also supports level configuration.
-  ARTIFACT_COMPRESSION_LEVEL: default # can also be set to fastest, fast, slow and slowest. If just enabling fastzip is not enough try setting this to fastest or fast.
-  CACHE_COMPRESSION_LEVEL: default # same as above, but for caches
-  # TRANSFER_METER_FREQUENCY: 5s # will display transfer progress every 5 seconds for artifacts and remote caches. For debugging purposes.
-
-stages:
-  - data
-  - setup
-  - tests
-  - sonarqube
-  - versioning
-  - build
-  - integration-tests
-  - release
-
-docker-build:
-  extends: .docker-build
-  needs:
-    - job: dvc-pull
-      artifacts: true
-    - !reference [.needs-versioning, needs] # leave this line as is
-  
-###################
-# INTEGRATION TESTS
-trigger-integration-tests:
-  extends: .integration-tests
-  # ADD THE MODEL BUILD WHICH SHOULD TRIGGER THE INTEGRATION TESTS
-  # needs:
-  #   - job: docker-build::model_name
-  #     artifacts: true
-  rules:
-    - when: never
-
-#########
-# RELEASE
-release:
-  extends: .release
-  needs:
-    - !reference [.needs-versioning, needs] # leave this line as is
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,3 @@
+[submodule "incl/redai_image"]
+	path = incl/redai_image
+	url = ssh://git@git.iqser.com:2222/rr/redai_image.git
--- a/.python-version
+++ b/.python-version
@ -1 +0,0 @@
-3.10
--- a/80
+++ b/80
@ -1,73 +1,25 @@
-FROM python:3.10-slim AS builder
+ARG BASE_ROOT="nexus.iqser.com:5001/red/"
+ARG VERSION_TAG="latest"

-ARG GITLAB_USER
-ARG GITLAB_ACCESS_TOKEN
+FROM ${BASE_ROOT}image-prediction-base:${VERSION_TAG}

-ARG PYPI_REGISTRY_RESEARCH=https://gitlab.knecon.com/api/v4/groups/19/-/packages/pypi
-ARG POETRY_SOURCE_REF_RESEARCH=gitlab-research
+WORKDIR /app/service

-ARG PYPI_REGISTRY_RED=https://gitlab.knecon.com/api/v4/groups/12/-/packages/pypi
-ARG POETRY_SOURCE_REF_RED=gitlab-red
+COPY src src
+COPY data data
+COPY image_prediction image_prediction
+COPY incl/redai_image/redai incl/redai_image/redai
+COPY setup.py setup.py
+COPY requirements.txt requirements.txt
+COPY config.yaml config.yaml

-ARG PYPI_REGISTRY_FFORESIGHT=https://gitlab.knecon.com/api/v4/groups/269/-/packages/pypi
-ARG POETRY_SOURCE_REF_FFORESIGHT=gitlab-fforesight
+# Install dependencies differing from base image.
+RUN python3 -m pip install -r requirements.txt

-ARG VERSION=dev
-
-LABEL maintainer="Research <research@knecon.com>"
-LABEL version="${VERSION}"
-
-WORKDIR /app
-
-###########
-# ENV SETUP
-ENV PYTHONDONTWRITEBYTECODE=true
-ENV PYTHONUNBUFFERED=true
-ENV POETRY_HOME=/opt/poetry
-ENV PATH="$POETRY_HOME/bin:$PATH"
-
-RUN apt-get update && \
-    apt-get install -y curl git bash build-essential libffi-dev libssl-dev && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN curl -sSL https://install.python-poetry.org | python3 -
-RUN poetry --version
-
-COPY pyproject.toml poetry.lock ./
-
-RUN poetry config virtualenvs.create true && \
-    poetry config virtualenvs.in-project true && \
-    poetry config installer.max-workers 10 && \
-    poetry config repositories.${POETRY_SOURCE_REF_RESEARCH} ${PYPI_REGISTRY_RESEARCH} && \
-    poetry config http-basic.${POETRY_SOURCE_REF_RESEARCH} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
-    poetry config repositories.${POETRY_SOURCE_REF_RED} ${PYPI_REGISTRY_RED} && \
-    poetry config http-basic.${POETRY_SOURCE_REF_RED} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
-    poetry config repositories.${POETRY_SOURCE_REF_FFORESIGHT} ${PYPI_REGISTRY_FFORESIGHT} && \
-    poetry config http-basic.${POETRY_SOURCE_REF_FFORESIGHT} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
-    poetry install --without=dev -vv --no-interaction --no-root
-
-###############
-# WORKING IMAGE
-FROM python:3.10-slim
-
-WORKDIR /app
-
-# COPY SOURCE CODE FROM BUILDER IMAGE
-COPY --from=builder /app /app
-# COPY BILL OF MATERIALS (BOM)
-COPY bom.json /bom.json
-
-ENV PATH="/app/.venv/bin:$PATH"
-
-###################
-# COPY SOURCE CODE
-COPY ./src ./src
-COPY ./config ./config
-COPY ./data ./data
-COPY banner.txt ./
+RUN python3 -m pip install -e .
+RUN python3 -m pip install -e incl/redai_image/redai

 EXPOSE 5000
 EXPOSE 8080

-CMD [ "python", "src/serve.py"]
+CMD ["python3", "src/serve.py"]
--- a/25
+++ b/25
@ -0,0 +1,25 @@
+FROM python:3.8 as builder1
+
+# Use a virtual environment.
+RUN python -m venv /app/venv
+ENV PATH="/app/venv/bin:$PATH"
+
+# Upgrade pip.
+RUN python -m pip install --upgrade pip
+
+# Make a directory for the service files and copy the service repo into the container.
+WORKDIR /app/service
+COPY ./requirements.txt ./requirements.txt
+
+# Install dependencies.
+RUN python3 -m pip install -r requirements.txt
+
+# Make a new container and copy all relevant files over to filter out temporary files
+# produced during setup to reduce the final container's size.
+FROM python:3.8
+
+WORKDIR /app/
+COPY --from=builder1  /app .
+ENV PATH="/app/venv/bin:$PATH"
+
+WORKDIR /app/service
--- a/43
+++ b/43
@ -1,43 +0,0 @@
-FROM python:3.10
-
-ARG USERNAME
-ARG TOKEN
-ARG PYPI_REGISTRY_RESEARCH=https://gitlab.knecon.com/api/v4/groups/19/-/packages/pypi
-ARG POETRY_SOURCE_REF_RESEARCH=gitlab-research
-ARG PYPI_REGISTRY_RED=https://gitlab.knecon.com/api/v4/groups/12/-/packages/pypi
-ARG POETRY_SOURCE_REF_RED=gitlab-red
-ARG VERSION=dev
-
-LABEL maintainer="Research <research@knecon.com>"
-LABEL version="${VERSION}"
-
-WORKDIR /app
-
-ENV PYTHONUNBUFFERED=true
-ENV POETRY_HOME=/opt/poetry
-ENV PATH="$POETRY_HOME/bin:$PATH"
-
-RUN curl -sSL https://install.python-poetry.org | python3 -
-
-COPY ./data ./data
-COPY ./test ./test
-COPY ./config ./config
-COPY ./src ./src
-COPY pyproject.toml poetry.lock banner.txt config.yaml./
-
-RUN poetry config virtualenvs.create false && \
-    poetry config installer.max-workers 10 && \
-    poetry config repositories.${POETRY_SOURCE_REF_RESEARCH} ${PYPI_REGISTRY_RESEARCH} && \
-    poetry config http-basic.${POETRY_SOURCE_REF_RESEARCH} ${USERNAME} ${TOKEN} && \
-    poetry config repositories.${POETRY_SOURCE_REF_RED} ${PYPI_REGISTRY_RED} && \
-    poetry config http-basic.${POETRY_SOURCE_REF_RED} ${USERNAME} ${TOKEN} && \
-    poetry install --without=dev -vv --no-interaction --no-root
-
-EXPOSE 5000
-EXPOSE 8080
-
-RUN apt update --yes
-RUN apt install vim --yes
-RUN apt install poppler-utils --yes
-
-CMD coverage run -m pytest test/ --tb=native -q -s -vvv -x && coverage combine && coverage report -m && coverage xml
--- a/README.md
+++ b/README.md
@ -1,143 +1,25 @@
-### Setup
+### Building

 Build base image
 ```bash
-docker build -t image-classification-image --progress=plain --no-cache \
-    -f Dockerfile \
-    --build-arg USERNAME=$GITLAB_USER \
-    --build-arg TOKEN=$GITLAB_ACCESS_TOKEN \
-    .
+setup/docker.sh
+```
+
+Build head image
+```bash
+docker build -f Dockerfile -t image-prediction . --build-arg BASE_ROOT=""
 ```

 ### Usage

-#### Without Docker
-
-
-```bash
-py scripts/run_pipeline.py /path/to/a/pdf
-```
-
-#### With Docker
-
 Shell 1

 ```bash
-docker run --rm --net=host image-prediction
+docker run --rm --net=host --rm image-prediction
 ```

 Shell 2

 ```bash
-python scripts/pyinfra_mock.py /path/to/a/pdf
+python scripts/pyinfra_mock.py --pdf_path /path/to/a/pdf
 ```
-
-### Tests
-
-Run for example this command to execute all tests and get a coverage report:
-
-```bash
-coverage run -m pytest test --tb=native -q -s -vvv -x && coverage combine && coverage report -m
-```
-
-After having built the service container as specified above, you can also run tests in a container as follows:
-
-```bash
-./run_tests.sh
-```
-
-### Message Body Formats
-
-
-#### Request Format
-
-The request messages need to provide the fields `"dossierId"` and `"fileId"`. A request should look like this:
-
-```json
-{
-    "dossierId": "<string identifier>",
-    "fileId": "<string identifier>"
-}
-```
-
-Any additional keys are ignored.
-
-
-#### Response Format
-
-Response bodies contain information about the identified class of the image, the confidence of the classification, the
-position and size of the image as well as the results of additional convenience filters which can be configured through
-environment variables. A response body looks like this:
-
-```json
-{
-  "dossierId": "debug",
-  "fileId": "13ffa9851740c8d20c4c7d1706d72f2a",
-  "data": [...]
-}
-```
-
-An image metadata record (entry in `"data"` field of a response body) looks like this:
-
-```json
-{
-  "classification": {
-    "label": "logo",
-    "probabilities": {
-      "logo": 1.0,
-      "signature": 1.1599173226749333e-17,
-      "other": 2.994595513398207e-23,
-      "formula": 4.352109377281029e-31
-    }
-  },
-  "position": {
-    "x1": 475.95,
-    "x2": 533.4,
-    "y1": 796.47,
-    "y2": 827.62,
-    "pageNumber": 6
-  },
-  "geometry": {
-    "width": 57.44999999999999,
-    "height": 31.149999999999977
-  },
-  "alpha": false,
-  "filters": {
-    "geometry": {
-      "imageSize": {
-        "quotient": 0.05975350599135938,
-        "tooLarge": false,
-        "tooSmall": false
-      },
-      "imageFormat": {
-        "quotient": 1.8443017656500813,
-        "tooTall": false,
-        "tooWide": false
-      }
-    },
-    "probability": {
-      "unconfident": false
-    },
-    "allPassed": true
-  }
-}
-```
-
-
-## Configuration
-
-A configuration file is located under `config.yaml`. All relevant variables can be configured via
-exporting environment variables.
-
-| __Environment Variable__           | Default                            | Description                                                                            |
-|------------------------------------|------------------------------------|----------------------------------------------------------------------------------------|
-| __LOGGING_LEVEL_ROOT__             | "INFO"                             | Logging level for log file messages                                                    |
-| __VERBOSE__                        | *true*                             | Service prints document processing progress to stdout                                  |
-| __BATCH_SIZE__                     | 16                                 | Number of images in memory simultaneously per service instance                         |
-| __RUN_ID__                         | "fabfb1f192c745369b88cab34471aba7" | The ID of the mlflow run to load the image classifier from                             |
-| __MIN_REL_IMAGE_SIZE__             | 0.05                               | Minimally permissible image size to page size ratio                                    |
-| __MAX_REL_IMAGE_SIZE__             | 0.75                               | Maximally permissible image size to page size ratio                                    |
-| __MIN_IMAGE_FORMAT__               | 0.1                                | Minimally permissible image width to height ratio                                      |
-| __MAX_IMAGE_FORMAT__               | 10                                 | Maximally permissible image width to height ratio                                      |
-
-See also: https://git.iqser.com/projects/RED/repos/helm/browse/redaction/templates/image-service-v2
--- a/src/image_prediction/classifier/init.py
+++ b/src/image_prediction/classifier/init.py
--- a/bamboo-specs/pom.xml
+++ b/bamboo-specs/pom.xml
@ -0,0 +1,40 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>com.atlassian.bamboo</groupId>
+    <artifactId>bamboo-specs-parent</artifactId>
+    <version>7.1.2</version>
+    <relativePath/>
+  </parent>
+
+  <artifactId>bamboo-specs</artifactId>
+  <version>1.0.0-SNAPSHOT</version>
+  <packaging>jar</packaging>
+
+  <properties>
+    <sonar.skip>true</sonar.skip>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.atlassian.bamboo</groupId>
+      <artifactId>bamboo-specs-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.atlassian.bamboo</groupId>
+      <artifactId>bamboo-specs</artifactId>
+    </dependency>
+
+    <!-- Test dependencies -->
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <!-- run 'mvn test' to perform offline validation of the plan -->
+  <!-- run 'mvn -Ppublish-specs' to upload the plan to your Bamboo server -->
+</project>
--- a/bamboo-specs/src/main/java/buildjob/PlanSpec.java
+++ b/bamboo-specs/src/main/java/buildjob/PlanSpec.java
@ -0,0 +1,182 @@
+package buildjob;
+
+import com.atlassian.bamboo.specs.api.BambooSpec;
+import com.atlassian.bamboo.specs.api.builders.BambooKey;
+import com.atlassian.bamboo.specs.api.builders.docker.DockerConfiguration;
+import com.atlassian.bamboo.specs.api.builders.permission.PermissionType;
+import com.atlassian.bamboo.specs.api.builders.permission.Permissions;
+import com.atlassian.bamboo.specs.api.builders.permission.PlanPermissions;
+import com.atlassian.bamboo.specs.api.builders.plan.Job;
+import com.atlassian.bamboo.specs.api.builders.plan.Plan;
+import com.atlassian.bamboo.specs.api.builders.plan.PlanIdentifier;
+import com.atlassian.bamboo.specs.api.builders.plan.Stage;
+import com.atlassian.bamboo.specs.api.builders.plan.branches.BranchCleanup;
+import com.atlassian.bamboo.specs.api.builders.plan.branches.PlanBranchManagement;
+import com.atlassian.bamboo.specs.api.builders.project.Project;
+import com.atlassian.bamboo.specs.builders.task.CheckoutItem;
+import com.atlassian.bamboo.specs.builders.task.InjectVariablesTask;
+import com.atlassian.bamboo.specs.builders.task.ScriptTask;
+import com.atlassian.bamboo.specs.builders.task.VcsCheckoutTask;
+import com.atlassian.bamboo.specs.builders.task.CleanWorkingDirectoryTask;
+import com.atlassian.bamboo.specs.builders.task.VcsTagTask;
+import com.atlassian.bamboo.specs.builders.trigger.BitbucketServerTrigger;
+import com.atlassian.bamboo.specs.model.task.InjectVariablesScope;
+import com.atlassian.bamboo.specs.api.builders.Variable;
+import com.atlassian.bamboo.specs.util.BambooServer;
+import com.atlassian.bamboo.specs.builders.task.ScriptTask;
+import com.atlassian.bamboo.specs.model.task.ScriptTaskProperties.Location;
+
+/**
+ * Plan configuration for Bamboo.
+ * Learn more on: <a href="https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs">https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs</a>
+ */
+@BambooSpec
+public class PlanSpec {
+
+    private static final String SERVICE_NAME = "image-prediction";
+    private static final String SERVICE_NAME_BASE = "image-prediction-base";
+
+    private static final String SERVICE_KEY = SERVICE_NAME.toUpperCase().replaceAll("-","").replaceAll("_","");
+
+    /**
+     * Run main to publish plan on Bamboo
+     */
+    public static void main(final String[] args) throws Exception {
+        //By default credentials are read from the '.credentials' file.
+        BambooServer bambooServer = new BambooServer("http://localhost:8085");
+
+        Plan plan = new PlanSpec().createDockerBuildPlan();
+        bambooServer.publish(plan);
+        PlanPermissions planPermission = new PlanSpec().createPlanPermission(plan.getIdentifier());
+        bambooServer.publish(planPermission);
+    }
+
+    private PlanPermissions createPlanPermission(PlanIdentifier planIdentifier) {
+        Permissions permission = new Permissions()
+                .userPermissions("atlbamboo", PermissionType.EDIT, PermissionType.VIEW, PermissionType.ADMIN, PermissionType.CLONE, PermissionType.BUILD)
+                .groupPermissions("research", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
+                .groupPermissions("Development", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
+                .groupPermissions("QA", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
+                .loggedInUserPermissions(PermissionType.VIEW)
+                .anonymousUserPermissionView();
+        return new PlanPermissions(planIdentifier.getProjectKey(), planIdentifier.getPlanKey()).permissions(permission);
+    }
+
+    private Project project() {
+        return new Project()
+                .name("RED")
+                .key(new BambooKey("RED"));
+    }
+
+    public Plan createDockerBuildPlan() {
+    return new Plan(
+            project(),
+            SERVICE_NAME, new BambooKey(SERVICE_KEY))
+            .description("Docker build for image-prediction.")
+            // .variables()
+            .stages(new Stage("Build Stage")
+              .jobs(
+                new Job("Build Job", new BambooKey("BUILD"))
+                  .tasks(
+                    new CleanWorkingDirectoryTask()
+                        .description("Clean working directory.")
+                        .enabled(true),
+                    new VcsCheckoutTask()
+                        .description("Checkout default repository.")
+                        .checkoutItems(new CheckoutItem().defaultRepository()),
+                    new VcsCheckoutTask()
+                        .description("Checkout redai_image research repository.")
+                        .checkoutItems(new CheckoutItem().repository("RR / redai_image").path("redai_image")),
+                    new ScriptTask()
+                        .description("Set config and keys.")
+                        .inlineBody("mkdir -p ~/.ssh\n" +
+                                    "echo \"${bamboo.bamboo_agent_ssh}\" | base64 -d >> ~/.ssh/id_rsa\n" +
+                                    "echo \"host vector.iqser.com\" > ~/.ssh/config\n" +
+                                    "echo \"    user bamboo-agent\" >> ~/.ssh/config\n" +
+                                    "chmod 600 ~/.ssh/config ~/.ssh/id_rsa"),
+                    new ScriptTask()
+                        .description("Build Docker container.")
+                        .location(Location.FILE)
+                        .fileFromPath("bamboo-specs/src/main/resources/scripts/docker-build.sh")
+                        .argument(SERVICE_NAME + " " + SERVICE_NAME_BASE))
+                  .dockerConfiguration(
+                      new DockerConfiguration()
+                        .image("nexus.iqser.com:5001/infra/release_build:4.2.0")
+                        .volume("/var/run/docker.sock", "/var/run/docker.sock")),
+                new Job("Sonar Job", new BambooKey("SONAR"))
+                  .tasks(
+                    new CleanWorkingDirectoryTask()
+                        .description("Clean working directory.")
+                        .enabled(true),
+                    new VcsCheckoutTask()
+                        .description("Checkout default repository.")
+                        .checkoutItems(new CheckoutItem().defaultRepository()),
+                    new VcsCheckoutTask()
+                        .description("Checkout redai_image repository.")
+                        .checkoutItems(new CheckoutItem().repository("RR / redai_image").path("redai_image")),
+                    new ScriptTask()
+                        .description("Set config and keys.")
+                        .inlineBody("mkdir -p ~/.ssh\n" +
+                                    "echo \"${bamboo.bamboo_agent_ssh}\" | base64 -d >> ~/.ssh/id_rsa\n" +
+                                    "echo \"host vector.iqser.com\" > ~/.ssh/config\n" +
+                                    "echo \"    user bamboo-agent\" >> ~/.ssh/config\n" +
+                                    "chmod 600 ~/.ssh/config ~/.ssh/id_rsa"),
+                    new ScriptTask()
+                        .description("Run Sonarqube scan.")
+                        .location(Location.FILE)
+                        .fileFromPath("bamboo-specs/src/main/resources/scripts/sonar-scan.sh")
+                        .argument(SERVICE_NAME))
+                  .dockerConfiguration(
+                      new DockerConfiguration()
+                        .image("nexus.iqser.com:5001/infra/release_build:4.2.0")
+                        .volume("/var/run/docker.sock", "/var/run/docker.sock"))),
+            new Stage("Licence Stage")
+              .jobs(
+                new Job("Git Tag Job", new BambooKey("GITTAG"))
+                  .tasks(
+                    new VcsCheckoutTask()
+                        .description("Checkout default repository.")
+                        .checkoutItems(new CheckoutItem().defaultRepository()),
+                    new ScriptTask()
+                        .description("Build git tag.")
+                        .location(Location.FILE)
+                        .fileFromPath("bamboo-specs/src/main/resources/scripts/git-tag.sh"),
+                    new InjectVariablesTask()
+                        .description("Inject git tag.")
+                        .path("git.tag")
+                        .namespace("g")
+                        .scope(InjectVariablesScope.LOCAL),
+                    new VcsTagTask()
+                        .description("${bamboo.g.gitTag}")
+                        .tagName("${bamboo.g.gitTag}")
+                        .defaultRepository())
+                .dockerConfiguration(
+                    new DockerConfiguration()
+                        .image("nexus.iqser.com:5001/infra/release_build:4.4.1")),
+                new Job("Licence Job", new BambooKey("LICENCE"))
+                  .enabled(false)
+                  .tasks(
+                    new VcsCheckoutTask()
+                        .description("Checkout default repository.")
+                        .checkoutItems(new CheckoutItem().defaultRepository()),
+                    new ScriptTask()
+                        .description("Build licence.")
+                        .location(Location.FILE)
+                        .fileFromPath("bamboo-specs/src/main/resources/scripts/create-licence.sh"))
+                  .dockerConfiguration(
+                    new DockerConfiguration()
+                        .image("nexus.iqser.com:5001/infra/maven:3.6.2-jdk-13-3.0.0")
+                        .volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml")
+                        .volume("/var/run/docker.sock", "/var/run/docker.sock"))))
+            .linkedRepositories("RR / " + SERVICE_NAME)
+            .linkedRepositories("RR / redai_image")
+            .triggers(new BitbucketServerTrigger())
+            .planBranchManagement(new PlanBranchManagement()
+              .createForVcsBranch()
+              .delete(new BranchCleanup()
+                  .whenInactiveInRepositoryAfterDays(14))
+              .notificationForCommitters());
+    }
+
+
+}
--- a/bamboo-specs/src/main/resources/scripts/create-licence.sh
+++ b/bamboo-specs/src/main/resources/scripts/create-licence.sh
@ -0,0 +1,19 @@
+#!/bin/bash
+set -e
+
+if [[ \"${bamboo_version_tag}\" != \"dev\" ]]
+then
+    ${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
+                    -f ${bamboo_build_working_directory}/pom.xml \
+                    versions:set  \
+                    -DnewVersion=${bamboo_version_tag}
+
+    ${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
+                    -f ${bamboo_build_working_directory}/pom.xml \
+                    -B clean deploy \
+                    -e -DdeployAtEnd=true \
+                    -Dmaven.wagon.http.ssl.insecure=true \
+                    -Dmaven.wagon.http.ssl.allowall=true \
+                    -Dmaven.wagon.http.ssl.ignore.validity.dates=true \
+                    -DaltDeploymentRepository=iqser_release::default::https://nexus.iqser.com/repository/gin4-platform-releases
+fi
--- a/bamboo-specs/src/main/resources/scripts/docker-build.sh
+++ b/bamboo-specs/src/main/resources/scripts/docker-build.sh
@ -0,0 +1,19 @@
+#!/bin/bash
+set -e
+
+SERVICE_NAME=$1
+SERVICE_NAME_BASE=$2
+
+python3 -m venv build_venv
+source build_venv/bin/activate
+python3 -m pip install --upgrade pip
+
+pip install dvc
+pip install 'dvc[ssh]'
+dvc pull
+
+echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iqser.com/repository/python-combind/simple" >> pip.conf
+docker build -f Dockerfile_base  -t nexus.iqser.com:5001/red/$SERVICE_NAME_BASE:${bamboo_version_tag} .
+docker build -f Dockerfile  -t nexus.iqser.com:5001/red/$SERVICE_NAME:${bamboo_version_tag} --build-arg VERSION_TAG=${bamboo_version_tag} .
+echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
+docker push nexus.iqser.com:5001/red/$SERVICE_NAME:${bamboo_version_tag}
--- a/bamboo-specs/src/main/resources/scripts/git-tag.sh
+++ b/bamboo-specs/src/main/resources/scripts/git-tag.sh
@ -0,0 +1,9 @@
+#!/bin/bash
+set -e
+
+if [[ "${bamboo_version_tag}" = "dev" ]]
+then
+    echo "gitTag=${bamboo_planRepository_1_branch}_${bamboo_buildNumber}" > git.tag
+else
+    echo "gitTag=${bamboo_version_tag}" > git.tag
+fi
--- a/bamboo-specs/src/main/resources/scripts/sonar-scan.sh
+++ b/bamboo-specs/src/main/resources/scripts/sonar-scan.sh
@ -0,0 +1,51 @@
+#!/bin/bash
+set -e
+
+export JAVA_HOME=/usr/bin/sonar-scanner/jre
+
+python3 -m venv build_venv
+source build_venv/bin/activate
+python3 -m pip install --upgrade pip
+
+echo "dev setup for unit test and coverage 💖"
+
+pip install -e .
+pip install -r requirements.txt
+
+SERVICE_NAME=$1
+
+echo "dependency-check:aggregate"
+mkdir -p reports
+dependency-check --enableExperimental -f JSON -f HTML -f XML \
+  --disableAssembly -s . -o reports --project $SERVICE_NAME --exclude ".git/**" --exclude "venv/**" \
+  --exclude "build_venv/**" --exclude "**/__pycache__/**" --exclude "bamboo-specs/**"
+
+if [[ -z "${bamboo_repository_pr_key}" ]]
+then
+    echo "Sonar Scan for branch: ${bamboo_planRepository_1_branch}"
+    /usr/bin/sonar-scanner/bin/sonar-scanner \
+      -Dsonar.projectKey=RED_$SERVICE_NAME \
+      -Dsonar.sources=image_prediction \
+      -Dsonar.host.url=https://sonarqube.iqser.com \
+      -Dsonar.login=${bamboo_sonarqube_api_token_secret} \
+      -Dsonar.branch.name=${bamboo_planRepository_1_branch} \
+      -Dsonar.dependencyCheck.jsonReportPath=reports/dependency-check-report.json \
+      -Dsonar.dependencyCheck.xmlReportPath=reports/dependency-check-report.xml \
+      -Dsonar.dependencyCheck.htmlReportPath=reports/dependency-check-report.html \
+      -Dsonar.python.coverage.reportPaths=reports/coverage.xml
+ 
+else
+    echo "Sonar Scan for PR with key1: ${bamboo_repository_pr_key}"
+    /usr/bin/sonar-scanner/bin/sonar-scanner \
+      -Dsonar.projectKey=RED_$SERVICE_NAME \
+      -Dsonar.sources=image_prediction \
+      -Dsonar.host.url=https://sonarqube.iqser.com \
+      -Dsonar.login=${bamboo_sonarqube_api_token_secret} \
+      -Dsonar.pullrequest.key=${bamboo_repository_pr_key} \
+      -Dsonar.pullrequest.branch=${bamboo_repository_pr_sourceBranch} \
+      -Dsonar.pullrequest.base=${bamboo_repository_pr_targetBranch} \
+      -Dsonar.dependencyCheck.jsonReportPath=reports/dependency-check-report.json \
+      -Dsonar.dependencyCheck.xmlReportPath=reports/dependency-check-report.xml \
+      -Dsonar.dependencyCheck.htmlReportPath=reports/dependency-check-report.html \
+      -Dsonar.python.coverage.reportPaths=reports/coverage.xml
+fi
--- a/bamboo-specs/src/test/java/buildjob/PlanSpecTest.java
+++ b/bamboo-specs/src/test/java/buildjob/PlanSpecTest.java
@ -0,0 +1,16 @@
+package buildjob;
+
+
+import com.atlassian.bamboo.specs.api.builders.plan.Plan;
+import com.atlassian.bamboo.specs.api.exceptions.PropertiesValidationException;
+import com.atlassian.bamboo.specs.api.util.EntityPropertiesBuilders;
+import org.junit.Test;
+
+public class PlanSpecTest {
+    @Test
+    public void checkYourPlanOffline() throws PropertiesValidationException {
+        Plan plan = new PlanSpec().createDockerBuildPlan();
+
+        EntityPropertiesBuilders.build(plan);
+    }
+}
--- a/banner.txt
+++ b/banner.txt
@ -1,11 +0,0 @@
-+----------------------------------------------------+
-|                                  ___               |
-|                               __/_  `.  .-"""-.    |
-|_._     _,-'""`-._             \_,` | \-'  /   )`-')|
-|(,-.`._,'(       |\`-/|         "") `"`    \  ((`"` |
-|    `-.-' \ )-`( , o o)        ___Y  ,    .'7 /|    |
-|          `-    \`_`"'-       (_,___/...-` (_/_/    |
-|                                                    |
-+----------------------------------------------------+
-|            Image Classification Service            |
-+----------------------------------------------------+
--- a/bom.json
+++ b/bom.json
--- a/config.yaml
+++ b/config.yaml
@ -0,0 +1,28 @@
+webserver:
+  host: $SERVER_HOST|"127.0.0.1"  # webserver address
+  port: $SERVER_PORT|5000  # webserver port
+  mode: $SERVER_MODE|production  # webserver mode: {development, production}
+
+service:
+  logging_level: $LOGGING_LEVEL_ROOT|DEBUG  # Logging level for service logger
+  batch_size: $BATCH_SIZE|32  # Number of images in memory simultaneously
+  verbose: $VERBOSE|True  # Service prints document processing progress to stdout
+  run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7  # The ID of the mlflow run to load the model from
+
+
+# These variables control filters that are applied to either images, image metadata or model predictions. The filter
+# result values are reported in the service responses. For convenience the response to a request contains a
+# "filters.allPassed" field, which is set to false if any of the filters returned values did not meet its specified
+# required value.
+filters:
+
+  image_to_page_quotient: # Image size to page size ratio (ratio of geometric means of areas)
+    min: $MIN_REL_IMAGE_SIZE|0.05  # Minimum permissible
+    max: $MAX_REL_IMAGE_SIZE|0.75  # Maximum permissible
+
+  image_width_to_height_quotient:  # Image width to height ratio
+    min: $MIN_IMAGE_FORMAT|0.1  # Minimum permissible
+    max: $MAX_IMAGE_FORMAT|10  # Maximum permissible
+
+  min_confidence: $MIN_CONFIDENCE|0.5  # Minimum permissible prediction confidence
+
--- a/config/pyinfra.toml
+++ b/config/pyinfra.toml
@ -1,68 +0,0 @@
-
-[asyncio]
-max_concurrent_tasks = 10
-
-[dynamic_tenant_queues]
-enabled = true
-
-[metrics.prometheus]
-enabled = true
-prefix = "redactmanager_image_service"
-
-[tracing]
-enabled = true
-# possible values "opentelemetry" | "azure_monitor" (Excpects APPLICATIONINSIGHTS_CONNECTION_STRING environment variable.)
-type = "azure_monitor" 
-
-[tracing.opentelemetry]
-endpoint = "http://otel-collector-opentelemetry-collector.otel-collector:4318/v1/traces"
-service_name = "redactmanager_image_service"
-exporter = "otlp"
-
-[webserver]
-host = "0.0.0.0"
-port = 8080
-
-[rabbitmq]
-host = "localhost"
-port = 5672
-username = ""
-password = ""
-heartbeat = 60
-# Has to be a divider of heartbeat, and shouldn't be too big, since only in these intervals queue interactions happen (like receiving new messages)
-# This is also the minimum time the service needs to process a message
-connection_sleep = 5
-input_queue = "request_queue"
-output_queue = "response_queue"
-dead_letter_queue = "dead_letter_queue"
-
-tenant_event_queue_suffix = "_tenant_event_queue"
-tenant_event_dlq_suffix = "_tenant_events_dlq"
-tenant_exchange_name = "tenants-exchange"
-queue_expiration_time = 300000  # 5 minutes in milliseconds
-
-service_request_queue_prefix = "image_request_queue"
-service_request_exchange_name = "image_request_exchange"
-service_response_exchange_name = "image_response_exchange"
-service_dlq_name = "image_dlq"
-
-[storage]
-backend = "s3"
-
-[storage.s3]
-bucket = "redaction"
-endpoint = "http://127.0.0.1:9000"
-key = ""
-secret = ""
-region = "eu-central-1"
-
-[storage.azure]
-container = "redaction"
-connection_string = ""
-
-[storage.tenant_server]
-public_key = ""
-endpoint =  "http://tenant-user-management:8081/internal-api/tenants"
-
-[kubernetes]
-pod_name = "test_pod"
--- a/config/settings.toml
+++ b/config/settings.toml
@ -1,42 +0,0 @@
-[logging]
-level = "INFO"
-
-[service]
-# Print document processing progress to stdout
-verbose = false
-batch_size = 6
-image_stiching_tolerance = 1  # in pixels
-mlflow_run_id = "fabfb1f192c745369b88cab34471aba7"
-
-# These variables control filters that are applied to either images, image metadata or service_estimator predictions.
-# The filter result values are reported in the service responses. For convenience the response to a request contains a
-# "filters.allPassed" field, which is set to false if any of the values returned by the filters did not meet its
-# specified required value.
-[filters.confidence]
-# Minimum permissible prediction confidence
-min = 0.5
-
-# Image size to page size ratio (ratio of geometric means of areas)
-[filters.image_to_page_quotient]
-min = 0.05
-max = 0.75
-
-[filters.is_scanned_page]
-# Minimum permissible image to page ratio tolerance for a page to be considered scanned.
-# This is only used for filtering small images on scanned pages and is applied before processing the image, therefore
-# superseding the image_to_page_quotient filter that only applies a tag to the image after processing.
-tolerance = 0
-
-# Image width to height ratio
-[filters.image_width_to_height_quotient]
-min = 0.1
-max = 10
-
-# put class specific filters here ['signature', 'formula', 'logo']
-[filters.overrides.signature.image_to_page_quotient]
-max = 0.4
-
-[filters.overrides.logo.image_to_page_quotient]
-min = 0.06
-
-
--- a/data/.gitignore
+++ b/data/.gitignore
@ -1 +0,0 @@
-/mlruns
--- a/data/base_weights.h5.dvc
+++ b/data/base_weights.h5.dvc
@ -0,0 +1,4 @@
+outs:
+- md5: 6d0186c1f25e889d531788f168fa6cf0
+  size: 16727296
+  path: base_weights.h5
--- a/data/mlruns.dvc
+++ b/data/mlruns.dvc
@ -1,5 +1,5 @@
 outs:
- md5: ad061d607f615afc149643f62dbf37cc.dir
-  size: 166952700
-  nfiles: 179
+- md5: d1c708270bab6fcd344d4a8b05d1103d.dir
+  size: 150225383
+  nfiles: 178
  path: mlruns
--- a/doc/tests.drawio
+++ b/doc/tests.drawio
@ -1 +0,0 @@
-<mxfile host="app.diagrams.net" modified="2022-03-17T15:35:10.371Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36" etag="b-CbBXg6FXQ9T3Px-oLc" version="17.1.1" type="device"><diagram id="tS3WR_Pr6QhNVK3FqSUP" name="Page-1">1ZZRT6QwEMc/DY8mQHdRX93z9JLbmNzGmNxbQ0daLQzpDrL46a/IsCzinneJcd0XaP+dtsN/fkADscg3V06WeokKbBCHahOIb0Ecnydzf22FphPmyXknZM6oTooGYWWegcWQ1cooWI8CCdGSKcdiikUBKY006RzW47B7tONdS5nBRFil0k7VO6NId+rZPBz0azCZ7neOQh7JZR/MwlpLhfWOJC4DsXCI1LXyzQJs613vSzfv+57RbWIOCvqXCZqW9PBref27aZ7xsQ5vTn/cnvAqT9JW/MCwJuNzR8dZU9Nb4bAqFLSrhYG4qLUhWJUybUdrX3uvacqt70W+yeuCI9jsTTja2uDxAcyBXONDeILonWN04hn366EQUR+jd4qQsCa59tl26cEe32CH/sOt+TueoCONGRbS/kQs2YkHIGoYbFkRvuUTqAmFr1zyu2LlUvhLdjG/HtJlQO/VfOq6AyvJPI3z+HAL4wlwpbp/2V0qODxzUTJmLjo4c8nEkxaWFXcLLPzt4ithKI4BQzHBMOc/l8UvAeLrj9/hQTw9NhBnxwDibB+IB+ZvdvZ5/PnucAx6Gds5S4rLPw==</diagram></mxfile>
--- a/src/image_prediction/compositor/init.py
+++ b/src/image_prediction/compositor/init.py
--- a/image_prediction/config.py
+++ b/image_prediction/config.py
@ -0,0 +1,40 @@
+"""Implements a config object with dot-indexing syntax."""
+
+
+from envyaml import EnvYAML
+
+from image_prediction.locations import CONFIG_FILE
+
+
+def _get_item_and_maybe_make_dotindexable(container, item):
+    ret = container[item]
+    return DotIndexable(ret) if isinstance(ret, dict) else ret
+
+
+class DotIndexable:
+    def __init__(self, x):
+        self.x = x
+
+    def __getattr__(self, item):
+        return _get_item_and_maybe_make_dotindexable(self.x, item)
+
+    def __setitem__(self, key, value):
+        self.x[key] = value
+
+    def __repr__(self):
+        return self.x.__repr__()
+
+
+class Config:
+    def __init__(self, config_path):
+        self.__config = EnvYAML(config_path)
+
+    def __getattr__(self, item):
+        if item in self.__config:
+            return _get_item_and_maybe_make_dotindexable(self.__config, item)
+
+    def __getitem__(self, item):
+        return self.__getattr__(item)
+
+
+CONFIG = Config(CONFIG_FILE)
--- a/image_prediction/flask.py
+++ b/image_prediction/flask.py
@ -0,0 +1,45 @@
+import logging
+from typing import Callable
+
+from flask import Flask, request, jsonify
+
+from image_prediction.config import CONFIG
+
+logger = logging.getLogger(__name__)
+logger.setLevel(CONFIG.service.logging_level)
+
+
+def make_prediction_server(predict_fn: Callable):
+
+    app = Flask(__name__)
+
+    @app.route("/ready", methods=["GET"])
+    def ready():
+        resp = jsonify("OK")
+        resp.status_code = 200
+        return resp
+
+    @app.route("/health", methods=["GET"])
+    def healthy():
+        resp = jsonify("OK")
+        resp.status_code = 200
+        return resp
+
+    @app.route("/", methods=["POST"])
+    def predict():
+        pdf = request.data
+
+        logger.debug("Running predictor on document...")
+        try:
+            predictions = predict_fn(pdf)
+            response = jsonify(predictions)
+            logger.info("Analysis completed.")
+            return response
+        except Exception as err:
+            logger.error("Analysis failed.")
+            logger.exception(err)
+            response = jsonify("Analysis failed.")
+            response.status_code = 500
+            return response
+
+    return app
--- a/image_prediction/locations.py
+++ b/image_prediction/locations.py
@ -0,0 +1,10 @@
+from os import path
+
+MODULE_DIR = path.dirname(path.abspath(__file__))
+PACKAGE_ROOT_DIR = path.dirname(MODULE_DIR)
+
+CONFIG_FILE = path.join(PACKAGE_ROOT_DIR, "config.yaml")
+
+DATA_DIR = path.join(PACKAGE_ROOT_DIR, "data")
+MLRUNS_DIR = path.join(DATA_DIR, "mlruns")
+BASE_WEIGHTS = path.join(DATA_DIR, "base_weights.h5")
--- a/image_prediction/predictor.py
+++ b/image_prediction/predictor.py
@ -0,0 +1,121 @@
+import logging
+from itertools import chain
+from operator import itemgetter
+from typing import List, Dict, Iterable
+
+import numpy as np
+
+from image_prediction.config import CONFIG
+from image_prediction.locations import MLRUNS_DIR, BASE_WEIGHTS
+from image_prediction.utils import temporary_pdf_file
+from incl.redai_image.redai.redai.backend.model.model_handle import ModelHandle
+from incl.redai_image.redai.redai.backend.pdf.image_extraction import extract_and_stitch
+from incl.redai_image.redai.redai.utils.mlflow_reader import MlflowModelReader
+from incl.redai_image.redai.redai.utils.shared import chunk_iterable
+
+
+class Predictor:
+    """`ModelHandle` wrapper. Forwards to wrapped model handle for prediction and produces structured output that is
+    interpretable independently of the wrapped model (e.g. with regard to a .classes_ attribute).
+    """
+
+    def __init__(self, model_handle: ModelHandle = None):
+        """Initializes a ServiceEstimator.
+
+        Args:
+            model_handle: ModelHandle object to forward to for prediction. By default, a model handle is loaded from the
+                mlflow database via CONFIG.service.run_id.
+        """
+        try:
+            if model_handle is None:
+                reader = MlflowModelReader(run_id=CONFIG.service.run_id, mlruns_dir=MLRUNS_DIR)
+                self.model_handle = reader.get_model_handle(BASE_WEIGHTS)
+            else:
+                self.model_handle = model_handle
+
+            self.classes = self.model_handle.model.classes_
+            self.classes_readable = np.array(self.model_handle.classes)
+            self.classes_readable_aligned = self.classes_readable[self.classes[list(range(len(self.classes)))]]
+        except Exception as e:
+            logging.info(f"Service estimator initialization failed: {e}")
+
+    def __make_predictions_human_readable(self, probs: np.ndarray) -> List[Dict[str, float]]:
+        """Translates an n x m matrix of probabilities over classes into an n-element list of mappings from classes to
+        probabilities.
+
+        Args:
+            probs: probability matrix (items x classes)
+
+        Returns:
+            list of mappings from classes to probabilities.
+        """
+        classes = np.argmax(probs, axis=1)
+        classes = self.classes[classes]
+        classes_readable = [self.model_handle.classes[c] for c in classes]
+        return classes_readable
+
+    def predict(self, images: List, probabilities: bool = False, **kwargs):
+        """Gathers predictions for list of images. Assigns each image a class and optionally a probability distribution
+        over all classes.
+
+        Args:
+            images (List[PIL.Image]) : Images to gather predictions for.
+            probabilities: Whether to return dictionaries of the following form instead of strings:
+                {
+                    "class": predicted class,
+                    "probabilities": {
+                        "class 1" : class 1 probability,
+                        "class 2" : class 2 probability,
+                         ...
+                    }
+                }
+
+        Returns:
+            By default the return value is a list of classes (meaningful class name strings). Alternatively a list of
+            dictionaries with an additional probability field for estimated class probabilities per image can be
+            returned.
+        """
+        X = self.model_handle.prep_images(list(images))
+
+        probs_per_item = self.model_handle.model.predict_proba(X, **kwargs).astype(float)
+        classes = self.__make_predictions_human_readable(probs_per_item)
+
+        class2prob_per_item = [dict(zip(self.classes_readable_aligned, probs)) for probs in probs_per_item]
+        class2prob_per_item = [
+            dict(sorted(c2p.items(), key=itemgetter(1), reverse=True)) for c2p in class2prob_per_item
+        ]
+
+        predictions = [{"class": c, "probabilities": c2p} for c, c2p in zip(classes, class2prob_per_item)]
+
+        return predictions if probabilities else classes
+
+    def predict_pdf(self, pdf):
+        with temporary_pdf_file(pdf) as pdf_path:
+            image_metadata_pairs = self.__extract_image_metadata_pairs(pdf_path)
+            return self.__predict_images(image_metadata_pairs)
+
+    def __predict_images(self, image_metadata_pairs: Iterable, batch_size: int = CONFIG.service.batch_size):
+        def process_chunk(chunk):
+            images, metadata = zip(*chunk)
+            predictions = self.predict(images, probabilities=True)
+            return predictions, metadata
+
+        def predict(image_metadata_pair_generator):
+            chunks = chunk_iterable(image_metadata_pair_generator, n=batch_size)
+            return map(chain.from_iterable, zip(*map(process_chunk, chunks)))
+
+        try:
+            predictions, metadata = predict(image_metadata_pairs)
+            return predictions, metadata
+
+        except ValueError:
+            return [], []
+
+    @staticmethod
+    def __extract_image_metadata_pairs(pdf_path: str, **kwargs):
+        def image_is_large_enough(metadata: dict):
+            x1, x2, y1, y2 = itemgetter("x1", "x2", "y1", "y2")(metadata)
+
+            return abs(x1 - x2) > 2 and abs(y1 - y2) > 2
+
+        yield from extract_and_stitch(pdf_path, convert_to_rgb=True, filter_fn=image_is_large_enough, **kwargs)
--- a/image_prediction/response.py
+++ b/image_prediction/response.py
@ -0,0 +1,70 @@
+"""Defines functions for constructing service responses."""
+
+
+import math
+from itertools import starmap
+from operator import itemgetter
+
+from image_prediction.config import CONFIG
+
+
+def build_response(predictions: list, metadata: list) -> list:
+    return list(starmap(build_image_info, zip(predictions, metadata)))
+
+
+def build_image_info(prediction: dict, metadata: dict) -> dict:
+    def compute_geometric_quotient():
+        page_area_sqrt = math.sqrt(abs(page_width * page_height))
+        image_area_sqrt = math.sqrt(abs(x2 - x1) * abs(y2 - y1))
+        return image_area_sqrt / page_area_sqrt
+
+    page_width, page_height, x1, x2, y1, y2, width, height = itemgetter(
+        "page_width", "page_height", "x1", "x2", "y1", "y2", "width", "height"
+    )(metadata)
+
+    quotient = compute_geometric_quotient()
+
+    min_image_to_page_quotient_breached = bool(quotient < CONFIG.filters.image_to_page_quotient.min)
+    max_image_to_page_quotient_breached = bool(quotient > CONFIG.filters.image_to_page_quotient.max)
+    min_image_width_to_height_quotient_breached = bool(
+        width / height < CONFIG.filters.image_width_to_height_quotient.min
+    )
+    max_image_width_to_height_quotient_breached = bool(
+        width / height > CONFIG.filters.image_width_to_height_quotient.max
+    )
+
+    min_confidence_breached = bool(max(prediction["probabilities"].values()) < CONFIG.filters.min_confidence)
+    prediction["label"] = prediction.pop("class")  # "class" as field name causes problem for Java objectmapper
+    prediction["probabilities"] = {klass: round(prob, 6) for klass, prob in prediction["probabilities"].items()}
+
+    image_info = {
+        "classification": prediction,
+        "position": {"x1": x1, "x2": x2, "y1": y1, "y2": y2, "pageNumber": metadata["page_idx"] + 1},
+        "geometry": {"width": width, "height": height},
+        "filters": {
+            "geometry": {
+                "imageSize": {
+                    "quotient": quotient,
+                    "tooLarge": max_image_to_page_quotient_breached,
+                    "tooSmall": min_image_to_page_quotient_breached,
+                },
+                "imageFormat": {
+                    "quotient": width / height,
+                    "tooTall": min_image_width_to_height_quotient_breached,
+                    "tooWide": max_image_width_to_height_quotient_breached,
+                },
+            },
+            "probability": {"unconfident": min_confidence_breached},
+            "allPassed": not any(
+                [
+                    max_image_to_page_quotient_breached,
+                    min_image_to_page_quotient_breached,
+                    min_image_width_to_height_quotient_breached,
+                    max_image_width_to_height_quotient_breached,
+                    min_confidence_breached,
+                ]
+            ),
+        },
+    }
+
+    return image_info
--- a/image_prediction/utils.py
+++ b/image_prediction/utils.py
@ -0,0 +1,9 @@
+import tempfile
+from contextlib import contextmanager
+
+
+@contextmanager
+def temporary_pdf_file(pdf: bytes):
+    with tempfile.NamedTemporaryFile() as f:
+        f.write(pdf)
+        yield f.name
--- a/src/image_prediction/encoder/init.py
+++ b/src/image_prediction/encoder/init.py
--- a/incl/redai_image
+++ b/incl/redai_image
@ -0,0 +1 @@
+Subproject commit 4c3b26d7673457aaa99e0663dad6950cd36da967
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,73 +0,0 @@
-[tool.poetry]
-name = "image-classification-service"
-version = "2.17.0"
-description = ""
-authors = ["Team Research <research@knecon.com>"]
-readme = "README.md"
-packages = [{ include = "image_prediction", from = "src" }]
-
-[tool.poetry.dependencies]
-python = ">=3.10,<3.11"
-# FIXME: This should be recent pyinfra, but the recent protobuf packages are not compatible with tensorflow 2.9.0, also
-#  see RED-9948.
-pyinfra = { version = "3.4.2", source = "gitlab-research" }
-kn-utils = { version = ">=0.4.0", source = "gitlab-research" }
-dvc = "^2.34.0"
-dvc-ssh = "^2.20.0"
-dvc-azure = "^2.21.2"
-Flask = "^2.1.1"
-requests = "^2.27.1"
-iteration-utilities = "^0.11.0"
-waitress = "^2.1.1"
-envyaml = "^1.10.211231"
-dependency-check = "^0.6.0"
-mlflow = "^1.24.0"
-numpy = "^1.22.3"
-tqdm = "^4.64.0"
-pandas = "^1.4.2"
-# FIXME: Our current model significantly changes the prediction behaviour when using newer tensorflow (/ protobuf)
-#  versions which is introduuced by pyinfra updates using newer protobuf versions, see RED-9948.
-tensorflow = "2.9.0"
-protobuf = "^3.20"
-pytest = "^7.1.0"
-funcy = "^2"
-PyMuPDF = "^1.19.6"
-fpdf = "^1.7.2"
-coverage = "^6.3.2"
-Pillow = "^9.1.0"
-pdf2image = "^1.16.0"
-frozendict = "^2.3.0"
-fsspec = "^2022.11.0"
-PyMonad = "^2.4.0"
-pdfnetpython3 = "9.4.2"
-loguru = "^0.7.0"
-cyclonedx-bom = "^4.5.0"
-
-[tool.poetry.group.dev.dependencies]
-pytest = "^7.0.1"
-pymonad = "^2.4.0"
-pylint = "^2.17.4"
-ipykernel = "^6.23.2"
-
-[tool.pytest.ini_options]
-testpaths = ["test"]
-addopts = "--ignore=data"
-filterwarnings = ["ignore:.*:DeprecationWarning"]
-
-[[tool.poetry.source]]
-name = "PyPI"
-priority = "primary"
-
-[[tool.poetry.source]]
-name = "gitlab-research"
-url = "https://gitlab.knecon.com/api/v4/groups/19/-/packages/pypi/simple"
-priority = "explicit"
-
-[[tool.poetry.source]]
-name = "gitlab-red"
-url = "https://gitlab.knecon.com/api/v4/groups/12/-/packages/pypi/simple"
-priority = "explicit"
-
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,21 @@
+Flask==2.0.2
+requests==2.27.1
+iteration-utilities==0.11.0
+dvc==2.9.3
+dvc[ssh]
+frozendict==2.3.0
+waitress==2.0.0
+envyaml~=1.8.210417
+dependency-check==0.6.*
+envyaml~=1.8.210417
+mlflow~=1.20.2
+numpy~=1.19.3
+PDFNetPython3~=9.1.0
+tqdm~=4.62.2
+pandas~=1.3.1
+mlflow~=1.20.2
+tensorflow~=2.5.0
+PDFNetPython3~=9.1.0
+Pillow~=8.3.2
+PyYAML~=5.4.1
+scikit_learn~=0.24.2
--- a/scripts/debug/debug.py
+++ b/scripts/debug/debug.py
@ -1,46 +0,0 @@
-"""Script to debug RED-9948. The predictions unexpectedly changed for some images, and we need to understand why."""
-
-import json
-import random
-from pathlib import Path
-
-import numpy as np
-import tensorflow as tf
-from kn_utils.logging import logger
-
-from image_prediction.config import CONFIG
-from image_prediction.pipeline import load_pipeline
-
-
-def process_pdf(pipeline, pdf_path, page_range=None):
-    with open(pdf_path, "rb") as f:
-        logger.info(f"Processing {pdf_path}")
-        predictions = list(pipeline(f.read(), page_range=page_range))
-
-    return predictions
-
-
-def ensure_seeds():
-    seed = 42
-    np.random.seed(seed)
-    random.seed(seed)
-    tf.random.set_seed(seed)
-
-
-def debug_info():
-    devices = tf.config.list_physical_devices()
-    print("Available devices:", devices)
-
-
-if __name__ == "__main__":
-    # For in container debugging, copy the file and adjust the path.
-    debug_file_path = Path(__file__).parents[2] / "test" / "data" / "RED-9948" / "SYNGENTA_EFSA_sanitisation_GFL_v2"
-    ensure_seeds()
-    debug_info()
-
-    pipeline = load_pipeline(verbose=True, batch_size=CONFIG.service.batch_size)
-    predictions = process_pdf(pipeline, debug_file_path)
-    # This is the image that has the wrong prediction mentioned in RED-9948. The predictions should inconclusive, and
-    # the flag all passed should be false.
-    predictions = [x for x in predictions if x["representation"] == "FA30F080F0C031CE17E8CF237"]
-    print(json.dumps(predictions, indent=2))
--- a/scripts/devenvsetup.sh
+++ b/scripts/devenvsetup.sh
@ -1,30 +0,0 @@
-#!/bin/bash
-python_version=$1
-gitlab_user=$2
-gitlab_personal_access_token=$3
-
-# cookiecutter https://gitlab.knecon.com/knecon/research/template-python-project.git --checkout master
-# latest_dir=$(ls -td -- */ | head -n 1)  # should be the dir cookiecutter just created
-
-# cd $latest_dir
-
-pyenv install $python_version
-pyenv local $python_version
-pyenv shell $python_version
-
-pip install --upgrade pip
-pip install poetry
-
-poetry config installer.max-workers 10
-# research package registry
-poetry config repositories.gitlab-research https://gitlab.knecon.com/api/v4/groups/19/-/packages/pypi
-poetry config http-basic.gitlab-research ${gitlab_user} ${gitlab_personal_access_token}
-# redactmanager package registry
-poetry config repositories.gitlab-red https://gitlab.knecon.com/api/v4/groups/12/-/packages/pypi
-poetry config http-basic.gitlab-red ${gitlab_user} ${gitlab_personal_access_token}
-
-poetry env use $(pyenv which python)
-poetry install --with=dev
-poetry update
-
-source .venv/bin/activate
--- a/scripts/docker_build_run.sh
+++ b/scripts/docker_build_run.sh
@ -1,6 +0,0 @@
-docker build -t --platform linux/amd64 image-clsasification-service:$(poetry version -s)-dev \                                                               
-    -f Dockerfile \
-    --build-arg GITLAB_USER=$GITLAB_USER \
-    --build-arg GITLAB_ACCESS_TOKEN=$GITLAB_ACCESS_TOKEN \
-    . && \
-docker run -it --rm image-clsasification-service:$(poetry version -s)-dev
--- a/scripts/docker_tag_push.sh
+++ b/scripts/docker_tag_push.sh
@ -1,3 +0,0 @@
-docker tag image-clsasification-service:$(poetry version -s)-dev $NEXUS_REGISTRY/red/image-clsasification-service:$(poetry version -s)-dev
-
-docker push $NEXUS_REGISTRY/red/image-clsasification-service:$(poetry version -s)-dev
--- a/scripts/k8s_startup_probe.py
+++ b/scripts/k8s_startup_probe.py
@ -1,6 +0,0 @@
-from pyinfra.k8s_probes import startup
-from loguru import logger
-
-if __name__ == "__main__":
-    logger.debug("running health check")
-    startup.run_checks()
--- a/scripts/keras_MnWE.py
+++ b/scripts/keras_MnWE.py
@ -1,58 +0,0 @@
-import multiprocessing
-
-import numpy as np
-from tensorflow import keras
-from tensorflow.keras import layers
-
-
-def process(predict_fn_wrapper):
-    # We observed memory doesn't get properly deallocated unless we do this:
-    manager = multiprocessing.Manager()
-    return_dict = manager.dict()
-    p = multiprocessing.Process(
-        target=predict_fn_wrapper,
-        args=(return_dict,),
-    )
-    p.start()
-    p.join()
-    try:
-        return dict(return_dict)["result"]
-    except KeyError:
-        pass
-
-
-def make_model():
-    inputs = keras.Input(shape=(784,))
-    dense = layers.Dense(64, activation="relu")
-    x = dense(inputs)
-    outputs = layers.Dense(10)(x)
-    model = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")
-    model.compile(
-        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
-        optimizer=keras.optimizers.RMSprop(),
-        metrics=["accuracy"],
-    )
-    return model
-
-
-def make_predict_fn():
-    # Keras bug: doesn't work in outer scope
-    model = make_model()
-
-    def predict(*args):
-        # service_estimator = make_model()
-        return model.predict(np.random.random(size=(1, 784)))
-
-    return predict
-
-
-def make_predict_fn_wrapper(predict_fn):
-    def predict_fn_wrapper(return_dict):
-        return_dict["result"] = predict_fn()
-
-    return predict_fn_wrapper
-
-
-if __name__ == "__main__":
-    predict_fn = make_predict_fn()
-    print(process(make_predict_fn_wrapper(predict_fn)))
--- a/scripts/pyinfra_mock.py
+++ b/scripts/pyinfra_mock.py
@ -6,7 +6,7 @@ import requests

 def parse_args():
    parser = argparse.ArgumentParser()
-    parser.add_argument("pdf_path")
+    parser.add_argument("--pdf_path", required=True)
    args = parser.parse_args()

    return args
--- a/scripts/run_pipeline.py
+++ b/scripts/run_pipeline.py
@ -1,58 +0,0 @@
-import argparse
-import json
-import os
-from glob import glob
-
-from image_prediction.config import CONFIG
-from image_prediction.pipeline import load_pipeline
-from image_prediction.utils import get_logger
-from image_prediction.utils.pdf_annotation import annotate_pdf
-
-logger = get_logger()
-
-logger.setLevel("DEBUG")
-
-
-def parse_args():
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument("input", help="pdf file or directory")
-    parser.add_argument("--print", "-p", help="print output to terminal", action="store_true", default=False)
-    parser.add_argument("--page_interval", "-i", help="page interval [i, j), min index = 0", nargs=2, type=int)
-
-    args = parser.parse_args()
-
-    return args
-
-
-def process_pdf(pipeline, pdf_path, page_range=None):
-    with open(pdf_path, "rb") as f:
-        logger.info(f"Processing {pdf_path}")
-        predictions = list(pipeline(f.read(), page_range=page_range))
-
-    annotate_pdf(
-        pdf_path, predictions, os.path.join("/tmp", os.path.basename(pdf_path.replace(".pdf", "_annotated.pdf")))
-    )
-
-    return predictions
-
-
-def main(args):
-    pipeline = load_pipeline(verbose=CONFIG.service.verbose, batch_size=CONFIG.service.batch_size, tolerance=CONFIG.service.image_stiching_tolerance)
-
-    if os.path.isfile(args.input):
-        pdf_paths = [args.input]
-    else:
-        pdf_paths = glob(os.path.join(args.input, "*.pdf"))
-    page_range = range(*args.page_interval) if args.page_interval else None
-
-    for pdf_path in pdf_paths:
-        predictions = process_pdf(pipeline, pdf_path, page_range=page_range)
-        if args.print:
-            print(pdf_path)
-            print(json.dumps(predictions, indent=2))
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    main(args)
--- a/scripts/run_tests.sh
+++ b/scripts/run_tests.sh
@ -1,15 +0,0 @@
-echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
-
-pip install dvc
-pip install 'dvc[ssh]'
-echo "Pulling dvc data"
-dvc pull
-
-docker build -f Dockerfile_tests -t image-prediction-tests .
-
-rnd=$(date +"%s")
-name=image-prediction-tests-${rnd}
-
-echo "running tests container"
-
-docker run --rm --name $name -v $PWD:$PWD -w $PWD -v /var/run/docker.sock:/var/run/docker.sock image-prediction-tests
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,13 @@
+#!/usr/bin/env python
+
+from distutils.core import setup
+
+setup(
+    name="image_prediction",
+    version="0.1.0",
+    description="",
+    author="",
+    author_email="",
+    url="",
+    packages=["image_prediction"],
+)
--- a/setup/docker.sh
+++ b/setup/docker.sh
@ -0,0 +1,15 @@
+#!/bin/bash
+set -e
+
+python3 -m venv build_venv
+source build_venv/bin/activate
+python3 -m pip install --upgrade pip
+
+pip install dvc
+pip install 'dvc[ssh]'
+dvc pull
+
+git submodule update --init --recursive
+
+docker build -f Dockerfile_base -t image-prediction-base .
+docker build -f Dockerfile -t image-prediction .
--- a/sonar-project.properties
+++ b/sonar-project.properties
@ -0,0 +1,4 @@
+sonar.exclusions=bamboo-specs/**, **/test_data/**
+sonar.c.file.suffixes=-
+sonar.cpp.file.suffixes=-
+sonar.objc.file.suffixes=-
--- a/src/image_prediction/init.py
+++ b/src/image_prediction/init.py
@ -1,13 +0,0 @@
-import logging
-import sys
-
-# log config
-LOG_FORMAT = "%(asctime)s [%(levelname)s] - [%(filename)s -> %(funcName)s() -> %(lineno)s] : %(message)s"
-DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
-stream_handler = logging.StreamHandler(sys.stdout)
-stream_handler_format = logging.Formatter(LOG_FORMAT, datefmt=DATE_FORMAT)
-stream_handler.setFormatter(stream_handler_format)
-
-logger = logging.getLogger(__name__)
-logger.propagate = False
-logger.addHandler(stream_handler)
--- a/src/image_prediction/classifier/classifier.py
+++ b/src/image_prediction/classifier/classifier.py
@ -1,35 +0,0 @@
-from typing import List, Union, Tuple
-
-import numpy as np
-from PIL.Image import Image
-from funcy import rcompose
-
-from image_prediction.estimator.adapter.adapter import EstimatorAdapter
-from image_prediction.label_mapper.mapper import LabelMapper
-from image_prediction.utils import get_logger
-
-logger = get_logger()
-
-
-class Classifier:
-    def __init__(self, estimator_adapter: EstimatorAdapter, label_mapper: LabelMapper):
-        """Abstraction layer over different estimator backends (e.g. keras or scikit-learn). For each backend to be used
-        an EstimatorAdapter must be implemented.
-
-        Args:
-            estimator_adapter: adapter for a given estimator backend
-        """
-        self.__estimator_adapter = estimator_adapter
-        self.__label_mapper = label_mapper
-        self.__pipe = rcompose(self.__estimator_adapter, self.__label_mapper)
-
-    def predict(self, batch: Union[np.array, Tuple[Image]]) -> List[str]:
-
-        if isinstance(batch, np.ndarray) and batch.shape[0] == 0:
-            return []
-
-        return self.__pipe(batch)
-
-    def __call__(self, batch: np.array) -> List[str]:
-        logger.debug("Classifier.predict")
-        return self.predict(batch)
--- a/src/image_prediction/classifier/image_classifier.py
+++ b/src/image_prediction/classifier/image_classifier.py
@ -1,32 +0,0 @@
-from itertools import chain
-from typing import Iterable
-
-from PIL.Image import Image
-from funcy import rcompose, chunks
-
-from image_prediction.classifier.classifier import Classifier
-from image_prediction.estimator.preprocessor.preprocessor import Preprocessor
-from image_prediction.estimator.preprocessor.preprocessors.identity import IdentityPreprocessor
-from image_prediction.utils import get_logger
-
-logger = get_logger()
-
-
-class ImageClassifier:
-    """Combines a classifier with a preprocessing pipeline: Receives images, chunks into batches, converts to tensors,
-    applies transformations and finally sends to internal classifier.
-    """
-
-    def __init__(self, classifier: Classifier, preprocessor: Preprocessor = None):
-        self.estimator = classifier
-        self.preprocessor = preprocessor if preprocessor else IdentityPreprocessor()
-        self.pipe = rcompose(self.preprocessor, self.estimator)
-
-    def predict(self, images: Iterable[Image], batch_size=16):
-        batches = chunks(batch_size, images)
-        predictions = chain.from_iterable(map(self.pipe, batches))
-        return predictions
-
-    def __call__(self, images: Iterable[Image], batch_size=16):
-        logger.debug("ImageClassifier.predict")
-        yield from self.predict(images, batch_size=batch_size)
--- a/src/image_prediction/compositor/compositor.py
+++ b/src/image_prediction/compositor/compositor.py
@ -1,16 +0,0 @@
-from funcy import rcompose
-
-from image_prediction.transformer.transformer import Transformer
-from image_prediction.utils import get_logger
-
-logger = get_logger()
-
-
-class TransformerCompositor(Transformer):
-    def __init__(self, formatter: Transformer, *formatters: Transformer):
-        formatters = (formatter, *formatters)
-        self.pipe = rcompose(*formatters)
-
-    def transform(self, obj):
-        logger.debug("TransformerCompositor.transform")
-        return self.pipe(obj)
--- a/src/image_prediction/config.py
+++ b/src/image_prediction/config.py
@ -1,7 +0,0 @@
-from pathlib import Path
-
-from pyinfra.config.loader import load_settings
-
-from image_prediction.locations import PROJECT_ROOT_DIR
-
-CONFIG = load_settings(root_path=PROJECT_ROOT_DIR, settings_path="config")
--- a/src/image_prediction/default_objects.py
+++ b/src/image_prediction/default_objects.py
@ -1,43 +0,0 @@
-from funcy import juxt
-
-from image_prediction.classifier.classifier import Classifier
-from image_prediction.classifier.image_classifier import ImageClassifier
-from image_prediction.compositor.compositor import TransformerCompositor
-from image_prediction.encoder.encoders.hash_encoder import HashEncoder
-from image_prediction.estimator.adapter.adapter import EstimatorAdapter
-from image_prediction.formatter.formatters.camel_case import Snake2CamelCaseKeyFormatter
-from image_prediction.formatter.formatters.enum import EnumFormatter
-from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor
-from image_prediction.label_mapper.mappers.probability import ProbabilityMapper
-from image_prediction.model_loader.loader import ModelLoader
-from image_prediction.model_loader.loaders.mlflow import MlflowConnector
-from image_prediction.redai_adapter.mlflow import MlflowModelReader
-from image_prediction.transformer.transformers.coordinate.pdfnet import PDFNetCoordinateTransformer
-from image_prediction.transformer.transformers.response import ResponseTransformer
-
-
-def get_mlflow_model_loader(mlruns_dir):
-    model_loader = ModelLoader(MlflowConnector(MlflowModelReader(mlruns_dir)))
-    return model_loader
-
-
-def get_image_classifier(model_loader, model_identifier):
-    model, classes = juxt(model_loader.load_model, model_loader.load_classes)(model_identifier)
-    return ImageClassifier(Classifier(EstimatorAdapter(model), ProbabilityMapper(classes)))
-
-
-def get_extractor(**kwargs):
-    image_extractor = ParsablePDFImageExtractor(**kwargs)
-
-    return image_extractor
-
-
-def get_formatter():
-    formatter = TransformerCompositor(
-        PDFNetCoordinateTransformer(), EnumFormatter(), ResponseTransformer(), Snake2CamelCaseKeyFormatter()
-    )
-    return formatter
-
-
-def get_encoder():
-    return HashEncoder()
--- a/src/image_prediction/encoder/encoder.py
+++ b/src/image_prediction/encoder/encoder.py
@ -1,13 +0,0 @@
-import abc
-from typing import Iterable
-
-from PIL.Image import Image
-
-
-class Encoder(abc.ABC):
-    @abc.abstractmethod
-    def encode(self, images: Iterable[Image]):
-        raise NotImplementedError
-
-    def __call__(self, images: Iterable[Image], batch_size=16):
-        yield from self.encode(images)
--- a/src/image_prediction/encoder/encoders/init.py
+++ b/src/image_prediction/encoder/encoders/init.py
--- a/src/image_prediction/encoder/encoders/hash_encoder.py
+++ b/src/image_prediction/encoder/encoders/hash_encoder.py
@ -1,26 +0,0 @@
-from typing import Iterable
-
-from PIL import Image
-
-from image_prediction.encoder.encoder import Encoder
-
-
-class HashEncoder(Encoder):
-    def encode(self, images: Iterable[Image.Image]):
-        yield from map(hash_image, images)
-
-    def __call__(self, images: Iterable[Image.Image], batch_size=16):
-        yield from self.encode(images)
-
-
-def hash_image(image: Image.Image) -> str:
-    """See: https://stackoverflow.com/a/49692185/3578468"""
-    image = image.resize((10, 10), Image.ANTIALIAS)
-    image = image.convert("L")
-    pixel_data = list(image.getdata())
-    avg_pixel = sum(pixel_data) / len(pixel_data)
-    bits = "".join(["1" if (px >= avg_pixel) else "0" for px in pixel_data])
-    hex_representation = str(hex(int(bits, 2)))[2:][::-1].upper()
-    # Note: For each 4 leading zeros, the hex representation will be shorter by one character.
-    # To ensure that all hashes have the same length, we pad the hex representation with zeros (also see RED-3813).
-    return hex_representation.zfill(25)
--- a/src/image_prediction/estimator/init.py
+++ b/src/image_prediction/estimator/init.py
--- a/src/image_prediction/estimator/adapter/init.py
+++ b/src/image_prediction/estimator/adapter/init.py
--- a/src/image_prediction/estimator/adapter/adapter.py
+++ b/src/image_prediction/estimator/adapter/adapter.py
@ -1,15 +0,0 @@
-from image_prediction.utils import get_logger
-
-logger = get_logger()
-
-
-class EstimatorAdapter:
-    def __init__(self, estimator):
-        self.estimator = estimator
-
-    def predict(self, batch):
-        return self.estimator(batch)
-
-    def __call__(self, batch):
-        logger.debug("EstimatorAdapter.predict")
-        return self.predict(batch)
--- a/src/image_prediction/estimator/adapter/adapters/init.py
+++ b/src/image_prediction/estimator/adapter/adapters/init.py
--- a/src/image_prediction/estimator/preprocessor/init.py
+++ b/src/image_prediction/estimator/preprocessor/init.py
--- a/src/image_prediction/estimator/preprocessor/preprocessor.py
+++ b/src/image_prediction/estimator/preprocessor/preprocessor.py
@ -1,10 +0,0 @@
-import abc
-
-
-class Preprocessor(abc.ABC):
-    @abc.abstractmethod
-    def preprocess(self, batch):
-        raise NotImplementedError
-
-    def __call__(self, batch):
-        return self.preprocess(batch)
--- a/src/image_prediction/estimator/preprocessor/preprocessors/init.py
+++ b/src/image_prediction/estimator/preprocessor/preprocessors/init.py
--- a/src/image_prediction/estimator/preprocessor/preprocessors/basic.py
+++ b/src/image_prediction/estimator/preprocessor/preprocessors/basic.py
@ -1,10 +0,0 @@
-from image_prediction.estimator.preprocessor.preprocessor import Preprocessor
-from image_prediction.estimator.preprocessor.utils import images_to_batch_tensor
-
-
-class BasicPreprocessor(Preprocessor):
-    """Converts images to tensors"""
-
-    @staticmethod
-    def preprocess(images):
-        return images_to_batch_tensor(images)
--- a/src/image_prediction/estimator/preprocessor/preprocessors/identity.py
+++ b/src/image_prediction/estimator/preprocessor/preprocessors/identity.py
@ -1,10 +0,0 @@
-from image_prediction.estimator.preprocessor.preprocessor import Preprocessor
-
-
-class IdentityPreprocessor(Preprocessor):
-    @staticmethod
-    def preprocess(images):
-        return images
-
-    def __call__(self, images):
-        return self.preprocess(images)
--- a/src/image_prediction/estimator/preprocessor/utils.py
+++ b/src/image_prediction/estimator/preprocessor/utils.py
@ -1,10 +0,0 @@
-import numpy as np
-from PIL.Image import Image
-
-
-def image_to_normalized_tensor(image: Image) -> np.ndarray:
-    return np.array(image) / 255
-
-
-def images_to_batch_tensor(images) -> np.ndarray:
-    return np.array(list(map(image_to_normalized_tensor, images)))
--- a/src/image_prediction/exceptions.py
+++ b/src/image_prediction/exceptions.py
@ -1,42 +0,0 @@
-class UnknownEstimatorAdapter(ValueError):
-    pass
-
-
-class UnknownImageExtractor(ValueError):
-    pass
-
-
-class UnknownModelLoader(ValueError):
-    pass
-
-
-class UnknownDatabaseType(ValueError):
-    pass
-
-
-class UnknownLabelFormat(ValueError):
-    pass
-
-
-class UnexpectedLabelFormat(ValueError):
-    pass
-
-
-class IncorrectInstantiation(RuntimeError):
-    pass
-
-
-class IntentionalTestException(RuntimeError):
-    pass
-
-
-class InvalidBox(Exception):
-    pass
-
-
-class ParsingError(Exception):
-    pass
-
-
-class BadXref(ValueError):
-    pass
--- a/src/image_prediction/extraction.py
+++ b/src/image_prediction/extraction.py
@ -1,13 +0,0 @@
-from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor
-
-
-def extract_images_from_pdf(pdf, extractor=None):
-
-    if not extractor:
-        extractor = ParsablePDFImageExtractor()
-
-    try:
-        images_extracted, metadata_extracted = zip(*extractor(pdf))
-        return images_extracted, metadata_extracted
-    except ValueError:
-        return [], []
--- a/src/image_prediction/flask.py
+++ b/src/image_prediction/flask.py
@ -1,60 +0,0 @@
-from typing import Callable
-
-from flask import Flask, request, jsonify
-from prometheus_client import generate_latest, CollectorRegistry, Summary
-
-from image_prediction.utils import get_logger
-from image_prediction.utils.process_wrapping import wrap_in_process
-
-logger = get_logger()
-
-
-def make_prediction_server(predict_fn: Callable):
-    app = Flask(__name__)
-    registry = CollectorRegistry(auto_describe=True)
-    metric = Summary(
-        f"redactmanager_imageClassification_seconds", f"Time spent on image-service classification.", registry=registry
-    )
-
-    @app.route("/ready", methods=["GET"])
-    def ready():
-        resp = jsonify("OK")
-        resp.status_code = 200
-        return resp
-
-    @app.route("/health", methods=["GET"])
-    def healthy():
-        resp = jsonify("OK")
-        resp.status_code = 200
-        return resp
-
-    def __failure():
-        response = jsonify("Analysis failed")
-        response.status_code = 500
-        return response
-
-    @app.route("/predict", methods=["POST"])
-    @app.route("/", methods=["POST"])
-    @metric.time()
-    def predict():
-
-        # Tensorflow does not free RAM. Workaround: Run prediction function (which instantiates a model) in sub-process.
-        # See: https://stackoverflow.com/questions/39758094/clearing-tensorflow-gpu-memory-after-model-execution
-        predict_fn_wrapped = wrap_in_process(predict_fn)
-
-        logger.info("Analysing...")
-        predictions = predict_fn_wrapped(request.data)
-
-        if predictions is not None:
-            response = jsonify(predictions)
-            logger.info("Analysis completed.")
-            return response
-        else:
-            logger.error("Analysis failed.")
-            return __failure()
-
-    @app.route("/prometheus", methods=["GET"])
-    def prometheus():
-        return generate_latest(registry=registry)
-
-    return app
--- a/src/image_prediction/formatter/init.py
+++ b/src/image_prediction/formatter/init.py
--- a/src/image_prediction/formatter/formatter.py
+++ b/src/image_prediction/formatter/formatter.py
@ -1,15 +0,0 @@
-import abc
-
-from image_prediction.transformer.transformer import Transformer
-
-
-class Formatter(Transformer):
-    @abc.abstractmethod
-    def format(self, obj):
-        raise NotImplementedError
-
-    def transform(self, obj):
-        raise NotImplementedError()
-
-    def __call__(self, obj):
-        return self.format(obj)
--- a/src/image_prediction/formatter/formatters/init.py
+++ b/src/image_prediction/formatter/formatters/init.py
--- a/src/image_prediction/formatter/formatters/camel_case.py
+++ b/src/image_prediction/formatter/formatters/camel_case.py
@ -1,11 +0,0 @@
-from image_prediction.formatter.formatters.key_formatter import KeyFormatter
-
-
-class Snake2CamelCaseKeyFormatter(KeyFormatter):
-    def format_key(self, key):
-
-        if isinstance(key, str):
-            head, *tail = key.split("_")
-            return head + "".join(map(str.title, tail))
-        else:
-            return key
--- a/src/image_prediction/formatter/formatters/enum.py
+++ b/src/image_prediction/formatter/formatters/enum.py
@ -1,23 +0,0 @@
-from enum import Enum
-
-from image_prediction.formatter.formatters.key_formatter import KeyFormatter
-
-
-class EnumFormatter(KeyFormatter):
-    def format_key(self, key):
-        return key.value if isinstance(key, Enum) else key
-
-    def transform(self, obj):
-        raise NotImplementedError
-
-
-class ReverseEnumFormatter(KeyFormatter):
-    def __init__(self, enum):
-        self.enum = enum
-        self.reverse_enum = {e.value: e for e in enum}
-
-    def format_key(self, key):
-        return self.reverse_enum.get(key, key)
-
-    def transform(self, obj):
-        raise NotImplementedError
--- a/src/image_prediction/formatter/formatters/identity.py
+++ b/src/image_prediction/formatter/formatters/identity.py
@ -1,6 +0,0 @@
-from image_prediction.formatter.formatter import Formatter
-
-
-class IdentityFormatter(Formatter):
-    def format(self, obj):
-        return obj
--- a/src/image_prediction/formatter/formatters/key_formatter.py
+++ b/src/image_prediction/formatter/formatters/key_formatter.py
@ -1,28 +0,0 @@
-import abc
-from typing import Iterable
-
-from image_prediction.formatter.formatter import Formatter
-
-
-class KeyFormatter(Formatter):
-    @abc.abstractmethod
-    def format_key(self, key):
-        raise NotImplementedError
-
-    def __format(self, data):
-
-        # If we wanted to do this properly, we would need handlers for all expected types and dispatch based
-        # on a type comparison. This is too much engineering for the limited use-case of this class though.
-        if isinstance(data, Iterable) and not isinstance(data, dict) and not isinstance(data, str):
-            f = map(self.__format, data)
-            return type(data)(f) if not isinstance(data, map) else f
-
-        if not isinstance(data, dict):
-            return data
-
-        keys_formatted = list(map(self.format_key, data))
-
-        return dict(zip(keys_formatted, map(self.__format, data.values())))
-
-    def format(self, data):
-        return self.__format(data)
--- a/src/image_prediction/image_extractor/init.py
+++ b/src/image_prediction/image_extractor/init.py
--- a/src/image_prediction/image_extractor/extractor.py
+++ b/src/image_prediction/image_extractor/extractor.py
@ -1,19 +0,0 @@
-import abc
-from collections import namedtuple
-from typing import Iterable
-
-from image_prediction.utils import get_logger
-
-ImageMetadataPair = namedtuple("ImageMetadataPair", ["image", "metadata"])
-
-logger = get_logger()
-
-
-class ImageExtractor(abc.ABC):
-    @abc.abstractmethod
-    def extract(self, obj) -> Iterable[ImageMetadataPair]:
-        raise NotImplementedError
-
-    def __call__(self, obj, **kwargs):
-        logger.debug("ImageExtractor.extract")
-        return self.extract(obj, **kwargs)
--- a/src/image_prediction/image_extractor/extractors/init.py
+++ b/src/image_prediction/image_extractor/extractors/init.py
--- a/src/image_prediction/image_extractor/extractors/mock.py
+++ b/src/image_prediction/image_extractor/extractors/mock.py
@ -1,7 +0,0 @@
-from image_prediction.image_extractor.extractor import ImageExtractor, ImageMetadataPair
-
-
-class ImageExtractorMock(ImageExtractor):
-    def extract(self, image_container):
-        for i, image in enumerate(image_container):
-            yield ImageMetadataPair(image, {"image_id": i})
--- a/src/image_prediction/image_extractor/extractors/parsable.py
+++ b/src/image_prediction/image_extractor/extractors/parsable.py
@ -1,300 +0,0 @@
-import atexit
-import json
-import traceback
-from _operator import itemgetter
-from functools import partial, lru_cache
-from itertools import chain, starmap, filterfalse, tee
-from operator import itemgetter, truth
-from typing import Iterable, Iterator, List, Union
-
-import fitz
-import numpy as np
-from PIL import Image
-from funcy import merge, pluck, compose, rcompose, remove, keep
-from scipy.stats import gmean
-
-from image_prediction.config import CONFIG
-from image_prediction.exceptions import InvalidBox
-from image_prediction.formatter.formatters.enum import EnumFormatter
-from image_prediction.image_extractor.extractor import ImageExtractor, ImageMetadataPair
-from image_prediction.info import Info
-from image_prediction.stitching.stitching import stitch_pairs
-from image_prediction.stitching.utils import validate_box
-from image_prediction.transformer.transformers.response import compute_geometric_quotient
-from image_prediction.utils import get_logger
-
-logger = get_logger()
-
-
-class ParsablePDFImageExtractor(ImageExtractor):
-    def __init__(self, verbose=False, tolerance=0):
-        """
-
-        Args:
-            verbose: Whether to show progressbar
-            tolerance: The tolerance in pixels for the distance between images, beyond which they will not be stitched
-                together
-        """
-        self.doc: fitz.Document = None
-        self.verbose = verbose
-        self.tolerance = tolerance
-
-    def extract(self, pdf: bytes, page_range: range = None):
-        self.doc = fitz.Document(stream=pdf)
-
-        pages = extract_pages(self.doc, page_range) if page_range else self.doc
-
-        image_metadata_pairs = chain.from_iterable(map(self.__process_images_on_page, pages))
-
-        yield from image_metadata_pairs
-
-    def __process_images_on_page(self, page: fitz.Page):
-        metadata = extract_valid_metadata(self.doc, page)
-        images = get_images_on_page(self.doc, metadata)
-
-        clear_caches()
-
-        image_metadata_pairs = starmap(ImageMetadataPair, filter(all, zip(images, metadata)))
-        #  TODO: In the future, consider to introduce an image validator as a pipeline component rather than doing the
-        #   validation here. Invalid images can then be split into a different stream and joined with the intact images
-        #   again for the formatting step.
-        image_metadata_pairs = self.__filter_valid_images(image_metadata_pairs)
-        image_metadata_pairs = stitch_pairs(list(image_metadata_pairs), tolerance=self.tolerance)
-
-        yield from image_metadata_pairs
-
-    @staticmethod
-    def __filter_valid_images(image_metadata_pairs: Iterable[ImageMetadataPair]) -> Iterator[ImageMetadataPair]:
-        def validate_image_is_not_corrupt(image: Image.Image, metadata: dict):
-            """See RED-5148: Some images are corrupt and cannot be processed by the image classifier. This function
-            filters out such images by trying to resize and convert them to RGB. If this fails, the image is considered
-            corrupt and is dropped.
-            TODO: find cleaner solution
-            """
-            try:
-                image.resize((100, 100)).convert("RGB")
-                return ImageMetadataPair(image, metadata)
-            except (OSError, Exception) as err:
-                metadata = json.dumps(EnumFormatter()(metadata), indent=2)
-                logger.warning(f"Invalid image encountered. Image metadata:\n{metadata}\n\n{traceback.format_exc()}")
-                return None
-
-        def filter_small_images_on_scanned_pages(image_metadata_pairs) -> Iterable[ImageMetadataPair]:
-            """See RED-9746: Small images on scanned pages should be dropped, so they are not classified. This is a
-            heuristic to filter out images that are too small in relation to the page size if they are on a scanned page.
-
-            The ratio is computed as the geometric mean of the width and height of the image divided by the geometric mean
-            of the width and height of the page. If the ratio is below the threshold, the image is dropped.
-            """
-
-            def image_is_a_scanned_page(image_metadata_pair: ImageMetadataPair) -> bool:
-                tolerance = CONFIG.filters.is_scanned_page.tolerance
-                width_ratio = image_metadata_pair.metadata[Info.WIDTH] / image_metadata_pair.metadata[Info.PAGE_WIDTH]
-                height_ratio = (
-                    image_metadata_pair.metadata[Info.HEIGHT] / image_metadata_pair.metadata[Info.PAGE_HEIGHT]
-                )
-                return width_ratio >= 1 - tolerance and height_ratio >= 1 - tolerance
-
-            def image_fits_geometric_mean_ratio(image_metadata_pair: ImageMetadataPair) -> bool:
-                min_ratio = CONFIG.filters.image_to_page_quotient.min
-                metadatum = image_metadata_pair.metadata
-                image_gmean = gmean([metadatum[Info.WIDTH], metadatum[Info.HEIGHT]])
-                page_gmean = gmean([metadatum[Info.PAGE_WIDTH], metadatum[Info.PAGE_HEIGHT]])
-                ratio = image_gmean / page_gmean
-                return ratio >= min_ratio
-
-            pairs, pairs_copy = tee(image_metadata_pairs)
-
-            if any(map(image_is_a_scanned_page, pairs_copy)):
-                logger.debug("Scanned page detected, filtering out small images ...")
-                return filter(image_fits_geometric_mean_ratio, pairs)
-            else:
-                return pairs
-
-        image_metadata_pairs = filter_small_images_on_scanned_pages(image_metadata_pairs)
-
-        return filter(truth, starmap(validate_image_is_not_corrupt, image_metadata_pairs))
-
-
-def extract_pages(doc, page_range):
-    page_range = range(page_range.start + 1, page_range.stop + 1)
-    pages = map(doc.load_page, page_range)
-
-    yield from pages
-
-
-def get_images_on_page(doc, metadata):
-    xrefs = pluck(Info.XREF, metadata)
-    images = map(partial(xref_to_image, doc), xrefs)
-
-    yield from images
-
-
-def extract_valid_metadata(doc: fitz.Document, page: fitz.Page):
-    metadata = get_metadata_for_images_on_page(page)
-    metadata = filter_valid_metadata(metadata)
-    metadata = add_alpha_channel_info(doc, metadata)
-
-    return list(metadata)
-
-
-def get_metadata_for_images_on_page(page: fitz.Page):
-    metadata = map(get_image_metadata, get_image_infos(page))
-    metadata = add_page_metadata(page, metadata)
-
-    yield from metadata
-
-
-def filter_valid_metadata(metadata):
-    yield from compose(
-        # TODO: Disabled for now, since atm since the backend needs atm the metadata and the hash of every image, even
-        #  scanned pages. In the future, this should be resolved differently, e.g. by filtering all page-sized images
-        #  and giving the user the ability to reclassify false positives with a separate call.
-        # filter_out_page_sized_images,
-        filter_out_tiny_images,
-        filter_out_invalid_metadata,
-    )(metadata)
-
-
-def filter_out_invalid_metadata(metadata):
-    def __validate_box(box):
-        try:
-            return validate_box(box)
-        except InvalidBox as err:
-            logger.debug(f"Dropping invalid metadatum, reason: {err}")
-
-    yield from keep(__validate_box, metadata)
-
-
-def filter_out_page_sized_images(metadata):
-    yield from remove(breaches_image_to_page_quotient, metadata)
-
-
-def filter_out_tiny_images(metadata):
-    yield from filterfalse(tiny, metadata)
-
-
-@lru_cache(maxsize=None)
-def get_image_infos(page: fitz.Page) -> List[dict]:
-    return page.get_image_info(xrefs=True)
-
-
-@lru_cache(maxsize=None)
-def xref_to_image(doc, xref) -> Union[Image.Image, None]:
-    # NOTE: image extraction is done via pixmap to array, as this method is twice as fast as extraction via bytestream
-    try:
-        pixmap = fitz.Pixmap(doc, xref)
-        array = convert_pixmap_to_array(pixmap)
-        return Image.fromarray(array)
-    except ValueError:
-        logger.debug(f"Xref {xref} is invalid, skipping extraction ...")
-        return
-
-
-def convert_pixmap_to_array(pixmap: fitz.Pixmap):
-    array = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(pixmap.h, pixmap.w, pixmap.n)
-    array = _normalize_channels(array)
-    return array
-
-
-def _normalize_channels(array: np.ndarray):
-    if array.shape[-1] == 1:
-        array = array[:, :, 0]
-    elif array.shape[-1] == 4:
-        array = array[..., :3]
-    elif array.shape[-1] != 3:
-        logger.warning(f"Unexpected image format: {array.shape}.")
-        raise ValueError(f"Unexpected image format: {array.shape}.")
-
-    return array
-
-
-def get_image_metadata(image_info):
-    xref, coords = itemgetter("xref", "bbox")(image_info)
-    x1, y1, x2, y2 = map(rounder, coords)
-
-    width = abs(x2 - x1)
-    height = abs(y2 - y1)
-
-    return {
-        Info.WIDTH: width,
-        Info.HEIGHT: height,
-        Info.X1: x1,
-        Info.X2: x2,
-        Info.Y1: y1,
-        Info.Y2: y2,
-        Info.XREF: xref,
-    }
-
-
-def add_page_metadata(page, metadata):
-    yield from map(partial(merge, get_page_metadata(page)), metadata)
-
-
-def add_alpha_channel_info(doc, metadata):
-    def add_alpha_value_to_metadatum(metadatum):
-        alpha = metadatum_to_alpha_value(metadatum)
-        return {**metadatum, Info.ALPHA: alpha}
-
-    xref_to_alpha = partial(has_alpha_channel, doc)
-    metadatum_to_alpha_value = compose(xref_to_alpha, itemgetter(Info.XREF))
-
-    yield from map(add_alpha_value_to_metadatum, metadata)
-
-
-@lru_cache(maxsize=None)
-def load_image_handle_from_xref(doc, xref):
-    try:
-        return doc.extract_image(xref)
-    except ValueError:
-        logger.debug(f"Xref {xref} is invalid, skipping extraction ...")
-        return
-
-
-rounder = rcompose(round, int)
-
-
-def get_page_metadata(page):
-    page_width, page_height = map(rounder, page.mediabox_size)
-
-    return {
-        Info.PAGE_WIDTH: page_width,
-        Info.PAGE_HEIGHT: page_height,
-        Info.PAGE_IDX: page.number,
-    }
-
-
-def has_alpha_channel(doc, xref):
-    maybe_image = load_image_handle_from_xref(doc, xref)
-    maybe_smask = maybe_image["smask"] if maybe_image else None
-
-    if maybe_smask:
-        return any([doc.extract_image(maybe_smask) is not None, bool(fitz.Pixmap(doc, maybe_smask).alpha)])
-    else:
-        try:
-            return bool(fitz.Pixmap(doc, xref).alpha)
-        except ValueError:
-            logger.debug(f"Encountered invalid xref `{xref}` in {doc.metadata.get('title', '<no title>')}.")
-            return False
-
-
-def tiny(metadata):
-    return metadata[Info.WIDTH] * metadata[Info.HEIGHT] <= 4
-
-
-def clear_caches():
-    get_image_infos.cache_clear()
-    load_image_handle_from_xref.cache_clear()
-    xref_to_image.cache_clear()
-
-
-atexit.register(clear_caches)
-
-
-def breaches_image_to_page_quotient(metadatum):
-    page_width, page_height, x1, x2, y1, y2, width, height = itemgetter(
-        Info.PAGE_WIDTH, Info.PAGE_HEIGHT, Info.X1, Info.X2, Info.Y1, Info.Y2, Info.WIDTH, Info.HEIGHT
-    )(metadatum)
-    geometric_quotient = compute_geometric_quotient(page_width, page_height, x2, x1, y2, y1)
-    quotient_breached = bool(geometric_quotient > CONFIG.filters.image_to_page_quotient.max)
-    return quotient_breached
--- a/src/image_prediction/info.py
+++ b/src/image_prediction/info.py
@ -1,15 +0,0 @@
-from enum import Enum
-
-
-class Info(Enum):
-    PAGE_WIDTH = "page_width"
-    PAGE_HEIGHT = "page_height"
-    PAGE_IDX = "page_idx"
-    WIDTH = "width"
-    HEIGHT = "height"
-    X1 = "x1"
-    X2 = "x2"
-    Y1 = "y1"
-    Y2 = "y2"
-    ALPHA = "alpha"
-    XREF = "xref"
--- a/src/image_prediction/label_mapper/init.py
+++ b/src/image_prediction/label_mapper/init.py
--- a/src/image_prediction/label_mapper/mapper.py
+++ b/src/image_prediction/label_mapper/mapper.py
@ -1,10 +0,0 @@
-import abc
-
-
-class LabelMapper(abc.ABC):
-    @abc.abstractmethod
-    def map_labels(self, items):
-        raise NotImplementedError
-
-    def __call__(self, items):
-        return self.map_labels(items)
--- a/src/image_prediction/label_mapper/mappers/init.py
+++ b/src/image_prediction/label_mapper/mappers/init.py
--- a/src/image_prediction/label_mapper/mappers/numeric.py
+++ b/src/image_prediction/label_mapper/mappers/numeric.py
@ -1,20 +0,0 @@
-from typing import Mapping, Iterable
-
-from image_prediction.exceptions import UnexpectedLabelFormat
-from image_prediction.label_mapper.mapper import LabelMapper
-
-
-class IndexMapper(LabelMapper):
-    def __init__(self, labels: Mapping[int, str]):
-        self.__labels = labels
-
-    def __validate_index_label_format(self, index_label: int) -> None:
-        if not 0 <= index_label < len(self.__labels):
-            raise UnexpectedLabelFormat(f"Received index label  '{index_label}' that has no associated string label.")
-
-    def __map_label(self, index_label: int) -> str:
-        self.__validate_index_label_format(index_label)
-        return self.__labels[index_label]
-
-    def map_labels(self, index_labels: Iterable[int]) -> Iterable[str]:
-        return map(self.__map_label, index_labels)
--- a/src/image_prediction/label_mapper/mappers/probability.py
+++ b/src/image_prediction/label_mapper/mappers/probability.py
@ -1,39 +0,0 @@
-from enum import Enum
-from operator import itemgetter
-from typing import Mapping, Iterable
-
-import numpy as np
-from funcy import rcompose, rpartial
-
-from image_prediction.exceptions import UnexpectedLabelFormat
-from image_prediction.label_mapper.mapper import LabelMapper
-
-
-class ProbabilityMapperKeys(Enum):
-    LABEL = "label"
-    PROBABILITIES = "probabilities"
-
-
-class ProbabilityMapper(LabelMapper):
-    def __init__(self, labels: Mapping[int, str]):
-        self.__labels = labels
-        # String conversion in the middle due to floating point precision issues.
-        # See: https://stackoverflow.com/questions/56820/round-doesnt-seem-to-be-rounding-properly
-        self.__rounder = rcompose(rpartial(round, 4), str, float)
-
-    def __validate_array_label_format(self, probabilities: np.ndarray) -> None:
-        if not len(probabilities) == len(self.__labels):
-            raise UnexpectedLabelFormat(
-                f"Received fewer probabilities ({len(probabilities)}) than labels were passed ({len(self.__labels)})."
-            )
-
-    def __map_array(self, probabilities: np.ndarray) -> dict:
-        self.__validate_array_label_format(probabilities)
-        cls2prob = dict(
-            sorted(zip(self.__labels, list(map(self.__rounder, probabilities))), key=itemgetter(1), reverse=True)
-        )
-        most_likely = [*cls2prob][0]
-        return {ProbabilityMapperKeys.LABEL: most_likely, ProbabilityMapperKeys.PROBABILITIES: cls2prob}
-
-    def map_labels(self, probabilities: Iterable[np.ndarray]) -> Iterable[dict]:
-        return map(self.__map_array, probabilities)
--- a/src/image_prediction/locations.py
+++ b/src/image_prediction/locations.py
@ -1,18 +0,0 @@
-"""Defines constant paths relative to the module root path."""
-
-from pathlib import Path
-
-# FIXME: move these paths to config, only depending on 'ROOT_PATH' environment variable.
-MODULE_DIR = Path(__file__).resolve().parents[0]
-PACKAGE_ROOT_DIR = MODULE_DIR.parents[0]
-PROJECT_ROOT_DIR = PACKAGE_ROOT_DIR.parents[0]
-
-CONFIG_FILE = PROJECT_ROOT_DIR / "config" / "settings.toml"
-BANNER_FILE = PROJECT_ROOT_DIR / "banner.txt"
-
-DATA_DIR = PROJECT_ROOT_DIR / "data"
-MLRUNS_DIR = str(DATA_DIR / "mlruns")
-
-TEST_DIR = PROJECT_ROOT_DIR / "test"
-TEST_DATA_DIR = TEST_DIR / "data"
-TEST_DATA_DIR_DVC = TEST_DIR / "data.dvc"
--- a/src/image_prediction/model_loader/init.py
+++ b/src/image_prediction/model_loader/init.py
--- a/src/image_prediction/model_loader/database/init.py
+++ b/src/image_prediction/model_loader/database/init.py
--- a/src/image_prediction/model_loader/database/connector.py
+++ b/src/image_prediction/model_loader/database/connector.py
@ -1,7 +0,0 @@
-import abc
-
-
-class DatabaseConnector(abc.ABC):
-    @abc.abstractmethod
-    def get_object(self, identifier):
-        raise NotImplementedError
--- a/src/image_prediction/model_loader/database/connectors/init.py
+++ b/src/image_prediction/model_loader/database/connectors/init.py
--- a/src/image_prediction/model_loader/database/connectors/mock.py
+++ b/src/image_prediction/model_loader/database/connectors/mock.py
@ -1,9 +0,0 @@
-from image_prediction.model_loader.database.connector import DatabaseConnector
-
-
-class DatabaseConnectorMock(DatabaseConnector):
-    def __init__(self, store: dict):
-        self.store = store
-
-    def get_object(self, identifier):
-        return self.store[identifier]
--- a/src/image_prediction/model_loader/loader.py
+++ b/src/image_prediction/model_loader/loader.py
@ -1,18 +0,0 @@
-from functools import lru_cache
-
-from image_prediction.model_loader.database.connector import DatabaseConnector
-
-
-class ModelLoader:
-    def __init__(self, database_connector: DatabaseConnector):
-        self.database_connector = database_connector
-
-    @lru_cache(maxsize=None)
-    def __get_object(self, identifier):
-        return self.database_connector.get_object(identifier)
-
-    def load_model(self, identifier):
-        return self.__get_object(identifier)["model"]
-
-    def load_classes(self, identifier):
-        return self.__get_object(identifier)["classes"]
--- a/src/image_prediction/model_loader/loaders/init.py
+++ b/src/image_prediction/model_loader/loaders/init.py
--- a/src/image_prediction/model_loader/loaders/mlflow.py
+++ b/src/image_prediction/model_loader/loaders/mlflow.py
@ -1,10 +0,0 @@
-from image_prediction.model_loader.database.connector import DatabaseConnector
-from image_prediction.redai_adapter.mlflow import MlflowModelReader
-
-
-class MlflowConnector(DatabaseConnector):
-    def __init__(self, mlflow_reader: MlflowModelReader):
-        self.mlflow_reader = mlflow_reader
-
-    def get_object(self, run_id):
-        return self.mlflow_reader[run_id]
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Matthias Bisping	7ec3d52e15	applied black	2022-03-16 13:21:52 +01:00
Matthias Bisping	06ea0be8aa	refactoring	2022-03-16 13:21:20 +01:00
				`@ -1 +0,0 @@`
				<mxfile host="app.diagrams.net" modified="2022-03-17T15:35:10.371Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36" etag="b-CbBXg6FXQ9T3Px-oLc" version="17.1.1" type="device"><diagram id="tS3WR_Pr6QhNVK3FqSUP" name="Page-1">1ZZRT6QwEMc/DY8mQHdRX93z9JLbmNzGmNxbQ0daLQzpDrL46a/IsCzinneJcd0XaP+dtsN/fkADscg3V06WeokKbBCHahOIb0Ecnydzf22FphPmyXknZM6oTooGYWWegcWQ1cooWI8CCdGSKcdiikUBKY006RzW47B7tONdS5nBRFil0k7VO6NId+rZPBz0azCZ7neOQh7JZR/MwlpLhfWOJC4DsXCI1LXyzQJs613vSzfv+57RbWIOCvqXCZqW9PBref27aZ7xsQ5vTn/cnvAqT9JW/MCwJuNzR8dZU9Nb4bAqFLSrhYG4qLUhWJUybUdrX3uvacqt70W+yeuCI9jsTTja2uDxAcyBXONDeILonWN04hn366EQUR+jd4qQsCa59tl26cEe32CH/sOt+TueoCONGRbS/kQs2YkHIGoYbFkRvuUTqAmFr1zyu2LlUvhLdjG/HtJlQO/VfOq6AyvJPI3z+HAL4wlwpbp/2V0qODxzUTJmLjo4c8nEkxaWFXcLLPzt4ithKI4BQzHBMOc/l8UvAeLrj9/hQTw9NhBnxwDibB+IB+ZvdvZ5/PnucAx6Gds5S4rLPw==</diagram></mxfile>
				`@ -0,0 +1 @@`
				`Subproject commit 4c3b26d7673457aaa99e0663dad6950cd36da967`