Formatting

Refactoring
Gets rid of `recursed` flag.
2023-02-28 14:03:02 +01:00 · 2023-02-28 13:54:21 +01:00 · 2023-02-28 13:42:32 +01:00 · 2023-02-15 20:16:16 +01:00 · 2023-02-15 19:50:14 +01:00 · 2023-02-15 19:31:00 +01:00
351 changed files with 8377 additions and 160692 deletions
--- a/.coveragerc
+++ b/.coveragerc
@ -10,7 +10,7 @@ omit =
    */build_venv/*
 	*/incl/*
 source =
-    cv_analysis
+    cv_analysis 
 relative_files = True
 data_file = .coverage

@ -46,4 +46,4 @@ ignore_errors = True
 directory = reports

 [xml]
-output = reports/coverage.xml
+output = reports/coverage.xml
--- a/.dockerignore
+++ b/.dockerignore
@ -97,4 +97,4 @@ target/
 *.swp
 */*.swp
 */*/*.swp
-*/*/*/*.swp
+*/*/*/*.swp
--- a/.dvc/config
+++ b/.dvc/config
@ -1,10 +1,7 @@
 [core]
-    remote = azure_remote
+    remote = vector
+    autostage = true
 ['remote "vector"']
    url = ssh://vector.iqser.com/research/nonml_cv_doc_parsing/
    port = 22
-['remote "azure_remote"']
-    url = azure://cv-sa-dvc/
-        connection_string = "DefaultEndpointsProtocol=https;AccountName=cvsacricket;AccountKey=KOuTAQ6Mp00ePTT5ObYmgaHlxwS1qukY4QU4Kuk7gy/vldneA+ZiKjaOpEFtqKA6Mtym2gQz8THy+ASts/Y1Bw==;EndpointSuffix=core.windows.net"
-['remote "local"']
-    url = ../dvc_local_remote
+
--- a/.gitignore
+++ b/.gitignore
@ -1,52 +1,27 @@
-# Environments
-.env
-.venv
-env/
-venv/
-.pytest*
-.python-version
-.DS_Store
-
-# Project folders
-scratch/
-*.vscode/
-.idea
-*_app
-*pytest_cache
-*joblib
-*tmp
-*profiling
-*logs
-*docker
-*drivers
-*bamboo-specs/target
-
-# Python specific files
 __pycache__/
-*.py[cod]
-*.ipynb
-*.ipynb_checkpoints
-
-# file extensions
-*.log
-*.csv
-*.json
-*.pkl
-*.profile
-*.cbm
-
-# temp files
-*.swp
-*~
-*.un~
-
-# keep files
-!notebooks/*.ipynb
-
-# keep folders
-!secrets
-!data/*
-!drivers
-
-# unignore files
-!bom.*
+*.egg-info/
+deskew_model/
+build_venv/
+/pdfs/
+/results/
+/pdfs/
+/env/
+/.idea/
+/.idea/.gitignore
+/.idea/misc.xml
+/.idea/inspectionProfiles/profiles_settings.xml
+/.idea/table_parsing.iml
+/.idea/vcs.xml
+/results/
+/table_parsing.egg-info
+/target/
+/tests/
+/cv_analysis.egg-info/dependency_links.txt
+/cv_analysis.egg-info/PKG-INFO
+/cv_analysis.egg-info/SOURCES.txt
+/cv_analysis.egg-info/top_level.txt
+/.vscode/
+/cv_analysis/test/test_data/example_pages.json
+/data/metadata_testing_files.csv
+.coverage
+/data/
--- a/.gitlab-ci.backup.yml
+++ b/.gitlab-ci.backup.yml
@ -1,30 +0,0 @@
-include:
-  - project: "Gitlab/gitlab"
-    ref: 0.3.0
-    file: "/ci-templates/research/dvc-versioning-build-release.gitlab-ci.yml"
-
-variables:
-  NEXUS_PROJECT_DIR: red
-  IMAGENAME: "${CI_PROJECT_NAME}"
-
-#################################
-# temp. disable integration tests, b/c they don't cover the CV analysis case yet
-trigger integration tests:
-  rules:
-    - when: never
-
-release build:
-  stage: release
-  needs:
-  - job: set custom version
-    artifacts: true
-    optional: true
-  - job: calculate patch version
-    artifacts: true
-    optional: true
-  - job: calculate minor version
-    artifacts: true
-    optional: true
-  - job: build docker nexus
-    artifacts: true
-#################################
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -1,35 +0,0 @@
-# CI for services, check gitlab repo for python package CI
-include:
-  - project: "Gitlab/gitlab"
-    ref: main
-    file: "/ci-templates/research/versioning-build-test-release.gitlab-ci.yml"
-  - project: "Gitlab/gitlab"
-    ref: main
-    file: "/ci-templates/research/docs.gitlab-ci.yml"
-
-# set project variables here
-variables:
-  NEXUS_PROJECT_DIR: red # subfolder in Nexus docker-gin where your container will be stored
-  IMAGENAME: $CI_PROJECT_NAME # if the project URL is gitlab.example.com/group-name/project-1, CI_PROJECT_NAME is project-1
-
-pages:
-  only:
-    - master # KEEP THIS, necessary because `master` branch and not `main` branch
-
-###################
-# INTEGRATION TESTS
-trigger-integration-tests:
-  extends: .integration-tests
-  # ADD THE MODEL BUILD WHICH SHOULD TRIGGER THE INTEGRATION TESTS
-  # needs:
-  #   - job: docker-build::model_name
-  #     artifacts: true
-  rules:
-    - when: never
-
-#########
-# RELEASE
-release:
-  extends: .release
-  needs:
-    - !reference [.needs-versioning, needs] # leave this line as is
--- a/.hooks/poetry_version_check.py
+++ b/.hooks/poetry_version_check.py
@ -1,61 +0,0 @@
-import subprocess
-import sys
-from pathlib import Path
-
-import semver
-from loguru import logger
-from semver.version import Version
-
-logger.remove()
-logger.add(sys.stdout, level="INFO")
-
-
-def bashcmd(cmds: list) -> str:
-    try:
-        logger.debug(f"running: {' '.join(cmds)}")
-        return subprocess.run(cmds, check=True, capture_output=True, text=True).stdout.strip("\n")
-    except:
-        logger.warning(f"Error executing the following bash command: {' '.join(cmds)}.")
-        raise
-
-
-def get_highest_existing_git_version_tag() -> str:
-    """Get highest versions from git tags depending on bump level"""
-    try:
-        git_tags = bashcmd(["git", "tag", "-l"]).split()
-        semver_compat_tags = list(filter(Version.is_valid, git_tags))
-        highest_git_version_tag = max(semver_compat_tags, key=semver.version.Version.parse)
-        logger.info(f"Highest git version tag: {highest_git_version_tag}")
-        return highest_git_version_tag
-    except:
-        logger.warning("Error getting git version tags")
-        raise
-
-
-def auto_bump_version() -> bool:
-    active = Path(".autoversion").is_file()
-    logger.debug(f"Automated version bump is set to '{active}'")
-    return active
-
-
-def main() -> None:
-    poetry_project_version = bashcmd(["poetry", "version", "-s"])
-
-    logger.info(f"Poetry project version: {poetry_project_version}")
-
-    highest_git_version_tag = get_highest_existing_git_version_tag()
-
-    comparison_result = semver.compare(poetry_project_version, highest_git_version_tag)
-
-    if comparison_result in (-1, 0):
-        logger.warning("Poetry version must be greater than git tag version.")
-        if auto_bump_version():
-            logger.info(bashcmd(["poetry", "version", highest_git_version_tag]))
-            sys.exit(0)
-        sys.exit(1)
-    else:
-        logger.info(f"All good: {poetry_project_version} > {highest_git_version_tag}")
-
-
-if __name__ == "__main__":
-    main()
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -1,72 +0,0 @@
-# See https://pre-commit.com for more information
-# See https://pre-commit.com/hooks.html for more hooks
-exclude: ^(docs/|notebooks/|data/|src/configs/|tests/|.hooks/|bom.json)
-default_language_version:
-  python: python3.10
-repos:
-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0
-    hooks:
-      - id: trailing-whitespace
-      - id: end-of-file-fixer
-      - id: check-yaml
-        args: [--unsafe] # needed for .gitlab-ci.yml
-      - id: check-toml
-      - id: detect-private-key
-      - id: check-added-large-files
-        args: ['--maxkb=10000']
-      - id: check-case-conflict
-      - id: mixed-line-ending
-
-  # - repo: https://github.com/pre-commit/mirrors-pylint
-  #   rev: v3.0.0a5
-  #   hooks:
-  #     - id: pylint
-  #       args:
-  #         - --disable=C0111,R0903,E0401
-  #         - --max-line-length=120
-
-  - repo: https://github.com/pre-commit/mirrors-isort
-    rev: v5.10.1
-    hooks:
-      - id: isort
-        args:
-          - --profile black
-
-  - repo: https://github.com/psf/black
-    rev: 24.10.0
-    hooks:
-      - id: black
-        # exclude: ^(docs/|notebooks/|data/|src/secrets/)
-        args:
-          - --line-length=120
-
-  - repo: https://github.com/compilerla/conventional-pre-commit
-    rev: v4.0.0
-    hooks:
-      - id: conventional-pre-commit
-        pass_filenames: false
-        stages: [commit-msg]
-        # args: [] # optional: list of Conventional Commits types to allow e.g. [feat, fix, ci, chore, test]
-
-  - repo: local
-    hooks:
-      - id: version-checker
-        name: version-checker
-        entry: python .hooks/poetry_version_check.py
-        language: python
-        always_run: true
-        additional_dependencies:
-          - "semver"
-          - "loguru"
-
-  # - repo: local
-  #   hooks:
-  #     - id: docker-build-test
-  #       name: testing docker build
-  #       entry: ./scripts/ops/docker-compose-build-run.sh
-  #       language: script
-  #       # always_run: true
-  #       pass_filenames: false
-  #       args: []
-  #       stages: [pre-commit]
--- a/84
+++ b/84
@ -1,78 +1,30 @@
-###############
-# BUILDER IMAGE
-FROM python:3.10-slim as builder
+FROM python:3.10

-ARG GITLAB_USER
-ARG GITLAB_ACCESS_TOKEN
+RUN python -m venv /app/venv
+ENV PATH="/app/venv/bin:$PATH"

-ARG PYPI_REGISTRY_RESEARCH=https://gitlab.knecon.com/api/v4/groups/19/-/packages/pypi
-ARG POETRY_SOURCE_REF_RESEARCH=gitlab-research
+RUN python -m pip install --upgrade pip

-ARG PYPI_REGISTRY_RED=https://gitlab.knecon.com/api/v4/groups/12/-/packages/pypi
-ARG POETRY_SOURCE_REF_RED=gitlab-red
+WORKDIR /app/service

-ARG PYPI_REGISTRY_FFORESIGHT=https://gitlab.knecon.com/api/v4/groups/269/-/packages/pypi
-ARG POETRY_SOURCE_REF_FFORESIGHT=gitlab-fforesight
+COPY ./requirements.txt ./requirements.txt
+RUN python3 -m pip install -r requirements.txt

-ARG VERSION=dev
+COPY ./incl/pyinfra/requirements.txt ./incl/pyinfra/requirements.txt
+RUN python -m pip install -r incl/pyinfra/requirements.txt

-LABEL maintainer="Research <research@knecon.com>"
-LABEL version="${VERSION}"
+COPY ./incl/pdf2image/requirements.txt ./incl/pdf2image/requirements.txt
+RUN python -m pip install -r incl/pdf2image/requirements.txt

-WORKDIR /app
+COPY ./incl ./incl

-###########
-# ENV SETUP
-ENV PYTHONDONTWRITEBYTECODE=true
-ENV PYTHONUNBUFFERED=true
-ENV POETRY_HOME=/opt/poetry
-ENV PATH="$POETRY_HOME/bin:$PATH"
+RUN python3 -m pip install -e incl/pyinfra
+RUN python3 -m pip install -e incl/pdf2image

-RUN apt-get update && \
-    apt-get install -y curl git bash build-essential libffi-dev libssl-dev && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN curl -sSL https://install.python-poetry.org | python3 -
-RUN poetry --version
-
-COPY pyproject.toml poetry.lock ./
-
-RUN poetry config virtualenvs.create true && \
-    poetry config virtualenvs.in-project true && \
-    poetry config installer.max-workers 10 && \
-    poetry config repositories.${POETRY_SOURCE_REF_RESEARCH} ${PYPI_REGISTRY_RESEARCH} && \
-    poetry config http-basic.${POETRY_SOURCE_REF_RESEARCH} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
-    poetry config repositories.${POETRY_SOURCE_REF_RED} ${PYPI_REGISTRY_RED} && \
-    poetry config http-basic.${POETRY_SOURCE_REF_RED} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
-    poetry config repositories.${POETRY_SOURCE_REF_FFORESIGHT} ${PYPI_REGISTRY_FFORESIGHT} && \
-    poetry config http-basic.${POETRY_SOURCE_REF_FFORESIGHT} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
-    poetry install --without=dev,docs,test -vv --no-interaction --no-root
-
-##################
-# COPY SOURCE CODE
-COPY ./config ./config
 COPY ./src ./src
+COPY ./cv_analysis ./cv_analysis
+COPY ./setup.py ./setup.py

-###############
-# WORKING IMAGE
-FROM python:3.10-slim
+RUN python3 -m pip install -e .

-# COPY BILL OF MATERIALS (BOM)
-COPY bom.json /bom.json
-
-# COPY SOURCE CODE FROM BUILDER IMAGE
-COPY --from=builder /app /app
-
-WORKDIR /app
-
-ENV PATH="/app/.venv/bin:$PATH"
-
-############
-# NETWORKING
-EXPOSE 5000
-EXPOSE 8080
-
-################
-# LAUNCH COMMAND
-CMD [ "python", "src/serve.py"]
+CMD ["python3", "-u", "src/serve.py"]
--- a/94
+++ b/94
@ -1,94 +0,0 @@
-.PHONY: \
-	poetry in-project-venv dev-env use-env install install-dev tests \
-	update-version sync-version-with-git \
-	docker docker-build-run docker-build docker-run \
-	docker-rm docker-rm-container docker-rm-image \
-	pre-commit get-licenses prep-commit \
-	docs sphinx_html sphinx_apidoc bom
-.DEFAULT_GOAL := run
-
-export DOCKER=docker
-export DOCKERFILE=Dockerfile
-export IMAGE_NAME=cv_analysis_service-image
-export CONTAINER_NAME=cv_analysis_service-container
-export HOST_PORT=9999
-export CONTAINER_PORT=9999
-export PYTHON_VERSION=python3.10
-
-# all commands should be executed in the root dir or the project,
-# specific environments should be deactivated
-
-poetry: in-project-venv use-env dev-env
-
-in-project-venv:
-	poetry config virtualenvs.in-project true
-
-use-env:
-	poetry env use ${PYTHON_VERSION}
-
-dev-env:
-	poetry install --with dev && poetry update
-
-install:
-	poetry add $(pkg)
-
-install-dev:
-	poetry add --dev $(pkg)
-
-requirements:
-	poetry export --without-hashes --output requirements.txt
-
-update-version:
-	poetry version prerelease
-
-sync-version-with-git:
-	git pull -p && poetry version $(git rev-list --tags --max-count=1 | git describe --tags --abbrev=0)
-
-bom:
-	cyclonedx-py poetry -o bom.json
-
-docker: docker-rm docker-build-run
-
-docker-build-run: docker-build docker-run
-
-docker-build:
-	$(DOCKER) build \
-	--no-cache --progress=plain \
-	-t $(IMAGE_NAME) -f $(DOCKERFILE) \
-	--build-arg USERNAME=${USERNAME} \
-	--build-arg TOKEN=${GITLAB_TOKEN} \
-	.
-
-docker-run:
-	$(DOCKER) run -it --rm -p $(HOST_PORT):$(CONTAINER_PORT)/tcp --name $(CONTAINER_NAME) $(IMAGE_NAME)
-
-docker-rm: docker-rm-container docker-rm-image
-
-docker-rm-container:
-	-$(DOCKER) rm $(CONTAINER_NAME)
-
-docker-rm-image:
-	-$(DOCKER) image rm $(IMAGE_NAME)
-
-tests:
-	poetry run pytest ./tests
-
-prep-commit:
-	docs get-license sync-version-with-git update-version pre-commit
-
-pre-commit:
-	pre-commit run --all-files
-
-get-licenses:
-	pip-licenses --format=json --order=license --with-urls > pkg-licenses.json
-
-docs: sphinx_apidoc sphinx_html
-
-sphinx_html:
-	poetry run sphinx-build -b html docs/source/ docs/build/html -E -a
-
-sphinx_apidoc:
-	cp ./README.md ./docs/source/README.md && cp -r ./data ./docs/source/data/ && poetry run sphinx-apidoc ./src -o ./docs/source/modules --no-toc --module-first --follow-links --separate --force
-
-bom:
-	cyclonedx-py poetry -o bom.json
--- a/README.md
+++ b/README.md
@ -1,60 +1,8 @@
-# cv-analysis - Visual (CV-Based) Document Parsing
+# cv-analysis &mdash; Visual (CV-Based) Document Parsing

-parse_pdf()
 This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
 previous redactions in documents.

-## API
-
-Input message:
-
-```json
-{
-  "targetFilePath": {
-    "pdf": "absolute file path",
-    "vlp_output": "absolute file path"
-  },
-  "responseFilePath": "absolute file path",
-  "operation": "table_image_inference"
-}
-```
-
-Response is uploaded to the storage as specified in the `responseFilePath` field. The structure is as follows:
-
-```json
-{
-  ...,
-  "data": [
-    {
-      'pageNum': 0,
-      'bbox': {
-        'x1': 55.3407,
-        'y1': 247.0246,
-        'x2': 558.5602,
-        'y2': 598.0585
-      },
-      'uuid': '2b10c1a2-393c-4fca-b9e3-0ad5b774ac84',
-      'label': 'table',
-      'tableLines': [
-        {
-          'x1': 0,
-          'y1': 16,
-          'x2': 1399,
-          'y2': 16
-        },
-        ...
-      ],
-      'imageInfo': {
-        'height': 693,
-        'width': 1414
-      }
-    },
-    ...
-  ]
-}
-
-```
-
 ## Installation

 ```bash
@ -83,9 +31,10 @@ The below snippet shows hot to find the outlines of previous redactions.

 ```python
 from cv_analysis.redaction_detection import find_redactions
-import pdf2image
+import pdf2image 
 import numpy as np

+
 pdf_path = ...
 page_index = ...

--- a/bamboo-specs/pom.xml
+++ b/bamboo-specs/pom.xml
@ -0,0 +1,40 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>com.atlassian.bamboo</groupId>
+    <artifactId>bamboo-specs-parent</artifactId>
+    <version>7.1.2</version>
+    <relativePath/>
+  </parent>
+
+  <artifactId>bamboo-specs</artifactId>
+  <version>1.0.0-SNAPSHOT</version>
+  <packaging>jar</packaging>
+
+  <properties>
+    <sonar.skip>true</sonar.skip>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.atlassian.bamboo</groupId>
+      <artifactId>bamboo-specs-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.atlassian.bamboo</groupId>
+      <artifactId>bamboo-specs</artifactId>
+    </dependency>
+
+    <!-- Test dependencies -->
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <!-- run 'mvn test' to perform offline validation of the plan -->
+  <!-- run 'mvn -Ppublish-specs' to upload the plan to your Bamboo server -->
+</project>
--- a/bamboo-specs/src/main/java/buildjob/PlanSpec.java
+++ b/bamboo-specs/src/main/java/buildjob/PlanSpec.java
@ -0,0 +1,178 @@
+package buildjob;
+
+import static com.atlassian.bamboo.specs.builders.task.TestParserTask.createJUnitParserTask;
+
+import java.time.LocalTime;
+
+import com.atlassian.bamboo.specs.api.BambooSpec;
+import com.atlassian.bamboo.specs.api.builders.BambooKey;
+import com.atlassian.bamboo.specs.api.builders.docker.DockerConfiguration;
+import com.atlassian.bamboo.specs.api.builders.permission.PermissionType;
+import com.atlassian.bamboo.specs.api.builders.permission.Permissions;
+import com.atlassian.bamboo.specs.api.builders.permission.PlanPermissions;
+import com.atlassian.bamboo.specs.api.builders.plan.Job;
+import com.atlassian.bamboo.specs.api.builders.plan.Plan;
+import com.atlassian.bamboo.specs.api.builders.plan.PlanIdentifier;
+import com.atlassian.bamboo.specs.api.builders.plan.Stage;
+import com.atlassian.bamboo.specs.api.builders.plan.branches.BranchCleanup;
+import com.atlassian.bamboo.specs.api.builders.plan.branches.PlanBranchManagement;
+import com.atlassian.bamboo.specs.api.builders.project.Project;
+import com.atlassian.bamboo.specs.builders.task.CheckoutItem;
+import com.atlassian.bamboo.specs.builders.task.InjectVariablesTask;
+import com.atlassian.bamboo.specs.builders.task.ScriptTask;
+import com.atlassian.bamboo.specs.builders.task.VcsCheckoutTask;
+import com.atlassian.bamboo.specs.builders.task.CleanWorkingDirectoryTask;
+import com.atlassian.bamboo.specs.builders.task.VcsTagTask;
+import com.atlassian.bamboo.specs.builders.trigger.BitbucketServerTrigger;
+import com.atlassian.bamboo.specs.builders.trigger.ScheduledTrigger;
+import com.atlassian.bamboo.specs.model.task.InjectVariablesScope;
+import com.atlassian.bamboo.specs.api.builders.Variable;
+import com.atlassian.bamboo.specs.util.BambooServer;
+import com.atlassian.bamboo.specs.builders.task.ScriptTask;
+import com.atlassian.bamboo.specs.model.task.ScriptTaskProperties.Location;
+
+/**
+ * Plan configuration for Bamboo.
+ * Learn more on: <a href="https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs">https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs</a>
+ */
+@BambooSpec
+public class PlanSpec {
+
+    private static final String SERVICE_NAME = "cv-analysis";
+
+    private static final String SERVICE_KEY = SERVICE_NAME.toUpperCase().replaceAll("-","").replaceAll("_","");
+
+    /**
+     * Run main to publish plan on Bamboo
+     */
+    public static void main(final String[] args) throws Exception {
+        //By default credentials are read from the '.credentials' file.
+        BambooServer bambooServer = new BambooServer("http://localhost:8085");
+
+        Plan plan = new PlanSpec().createDockerBuildPlan();
+        bambooServer.publish(plan);
+        PlanPermissions planPermission = new PlanSpec().createPlanPermission(plan.getIdentifier());
+        bambooServer.publish(planPermission);
+
+        Plan secPlan = new PlanSpec().createSecBuild();
+        bambooServer.publish(secPlan);
+        PlanPermissions secPlanPermission = new PlanSpec().createPlanPermission(secPlan.getIdentifier());
+        bambooServer.publish(secPlanPermission);
+    }
+
+    private PlanPermissions createPlanPermission(PlanIdentifier planIdentifier) {
+        Permissions permission = new Permissions()
+                .userPermissions("atlbamboo", PermissionType.EDIT, PermissionType.VIEW, PermissionType.ADMIN, PermissionType.CLONE, PermissionType.BUILD)
+                .groupPermissions("research", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
+                .groupPermissions("Development", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
+                .groupPermissions("QA", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
+                .loggedInUserPermissions(PermissionType.VIEW)
+                .anonymousUserPermissionView();
+        return new PlanPermissions(planIdentifier.getProjectKey(), planIdentifier.getPlanKey()).permissions(permission);
+    }
+
+    private Project project() {
+        return new Project()
+                .name("RED")
+                .key(new BambooKey("RED"));
+    }
+
+    public Plan createDockerBuildPlan() {
+    return new Plan(
+            project(),
+            SERVICE_NAME, new BambooKey(SERVICE_KEY))
+//             .description("Docker build for cv-analysis.")
+            // .variables()
+            .stages(new Stage("Build Stage")
+              .jobs(
+                new Job("Build Job", new BambooKey("BUILD"))
+                  .tasks(
+                    new CleanWorkingDirectoryTask()
+                        .description("Clean working directory.")
+                        .enabled(true),
+                    new VcsCheckoutTask()
+                        .description("Checkout default repository.")
+                        .checkoutItems(new CheckoutItem().defaultRepository()),
+                    new ScriptTask()
+                        .description("Set config and keys.")
+                        .location(Location.FILE)
+                        .fileFromPath("bamboo-specs/src/main/resources/scripts/key-prepare.sh"),
+                    new ScriptTask()
+                        .description("Build Docker container.")
+                        .location(Location.FILE)
+                        .fileFromPath("bamboo-specs/src/main/resources/scripts/docker-build.sh")
+                        .argument(SERVICE_NAME),
+                    new InjectVariablesTask()
+                        .description("Inject git tag.")
+                        .path("git.tag")
+                        .namespace("g")
+                        .scope(InjectVariablesScope.LOCAL),
+                    new VcsTagTask()
+                        .description("${bamboo.g.gitTag}")
+                        .tagName("${bamboo.g.gitTag}")
+                        .defaultRepository())
+                .dockerConfiguration(
+                    new DockerConfiguration()
+                        .image("nexus.iqser.com:5001/infra/release_build:4.5.0")
+                        .volume("/var/run/docker.sock", "/var/run/docker.sock")),
+                new Job("Licence Job", new BambooKey("LICENCE"))
+                  .enabled(false)
+                  .tasks(
+                    new VcsCheckoutTask()
+                        .description("Checkout default repository.")
+                        .checkoutItems(new CheckoutItem().defaultRepository()),
+                    new ScriptTask()
+                        .description("Build licence.")
+                        .location(Location.FILE)
+                        .fileFromPath("bamboo-specs/src/main/resources/scripts/create-licence.sh"))
+                  .dockerConfiguration(
+                    new DockerConfiguration()
+                        .image("nexus.iqser.com:5001/infra/maven:3.6.2-jdk-13-3.0.0")
+                        .volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml")
+                        .volume("/var/run/docker.sock", "/var/run/docker.sock"))))
+            .linkedRepositories("RR / " + SERVICE_NAME)
+            .triggers(
+                    new BitbucketServerTrigger())
+            .planBranchManagement(
+                new PlanBranchManagement()
+                    .createForVcsBranch()
+                    .delete(
+                        new BranchCleanup()
+                            .whenInactiveInRepositoryAfterDays(14))
+                    .notificationForCommitters());
+    }
+
+    public Plan createSecBuild() {
+        return new Plan(project(), SERVICE_NAME + "-Sec", new BambooKey(SERVICE_KEY + "SEC")).description("Security Analysis Plan")
+                .stages(new Stage("Default Stage").jobs(
+                    new Job("Sonar Job", new BambooKey("SONAR"))
+                      .tasks(
+                        new CleanWorkingDirectoryTask()
+                            .description("Clean working directory.")
+                            .enabled(true),
+                        new VcsCheckoutTask()
+                            .description("Checkout default repository.")
+                            .checkoutItems(new CheckoutItem().defaultRepository()),
+                        new ScriptTask()
+                            .description("Set config and keys.")
+                            .location(Location.FILE)
+                            .fileFromPath("bamboo-specs/src/main/resources/scripts/key-prepare.sh"),
+                        new ScriptTask()
+                            .description("Run Sonarqube scan.")
+                            .location(Location.FILE)
+                            .fileFromPath("bamboo-specs/src/main/resources/scripts/sonar-scan.sh")
+                            .argument(SERVICE_NAME))
+                      .dockerConfiguration(
+                          new DockerConfiguration()
+                            .image("nexus.iqser.com:5001/infra/release_build:4.2.0")
+                            .volume("/var/run/docker.sock", "/var/run/docker.sock"))))
+                .linkedRepositories("RR / " + SERVICE_NAME)
+                .triggers(
+                    new ScheduledTrigger()
+                        .scheduleOnceDaily(LocalTime.of(23, 00)))
+                .planBranchManagement(
+                    new PlanBranchManagement()
+                        .createForVcsBranchMatching("release.*")
+                        .notificationForCommitters());
+    }
+}
--- a/bamboo-specs/src/main/resources/scripts/create-licence.sh
+++ b/bamboo-specs/src/main/resources/scripts/create-licence.sh
@ -0,0 +1,19 @@
+#!/bin/bash
+set -e
+
+if [[ \"${bamboo_version_tag}\" != \"dev\" ]]
+then
+    ${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
+                    -f ${bamboo_build_working_directory}/pom.xml \
+                    versions:set  \
+                    -DnewVersion=${bamboo_version_tag}
+
+    ${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
+                    -f ${bamboo_build_working_directory}/pom.xml \
+                    -B clean deploy \
+                    -e -DdeployAtEnd=true \
+                    -Dmaven.wagon.http.ssl.insecure=true \
+                    -Dmaven.wagon.http.ssl.allowall=true \
+                    -Dmaven.wagon.http.ssl.ignore.validity.dates=true \
+                    -DaltDeploymentRepository=iqser_release::default::https://nexus.iqser.com/repository/gin4-platform-releases
+fi
--- a/bamboo-specs/src/main/resources/scripts/docker-build.sh
+++ b/bamboo-specs/src/main/resources/scripts/docker-build.sh
@ -0,0 +1,53 @@
+#!/bin/bash
+set -e
+
+SERVICE_NAME=$1
+
+if [[ "$bamboo_planRepository_branchName" == "master" ]]
+then
+    branchVersion=$(cat version.yaml | grep -Eo "version: .*" | sed -s 's|version: \(.*\)\..*\..*|\1|g')
+    latestVersion=$( semver $(git tag -l "${branchVersion}.*" ) | tail -n1 )
+    newVersion="$(semver $latestVersion -p -i minor)"
+    echo "new release on master with version $newVersion"
+elif [[ "$bamboo_planRepository_branchName" == release* ]]
+then
+    branchVersion=$(echo $bamboo_planRepository_branchName | sed -s 's|release\/\([0-9]\+\.[0-9]\+\)\.x|\1|')
+    latestVersion=$( semver $(git tag -l "${branchVersion}.*" ) | tail -n1 )
+    newVersion="$(semver $latestVersion -p -i patch)"
+    echo "new release on $bamboo_planRepository_branchName with version $newVersion"
+elif [[ "${bamboo_version_tag}" != "dev" ]]
+then
+    newVersion="${bamboo_version_tag}"
+    echo "new special version bild with $newVersion"
+else
+    newVersion="${bamboo_planRepository_1_branch}_${bamboo_buildNumber}"
+    echo "gitTag=${newVersion}" > git.tag
+    echo "dev build with tag ${newVersion}"
+    python3 -m venv build_venv
+    source build_venv/bin/activate
+    python3 -m pip install --upgrade pip
+
+    pip install dvc
+    pip install 'dvc[ssh]'
+    dvc pull
+
+    echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iqser.com/repository/python-combind/simple" >> pip.conf
+    echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
+    docker build -f Dockerfile .
+    exit 0
+fi
+
+echo "gitTag=${newVersion}" > git.tag
+
+python3 -m venv build_venv
+source build_venv/bin/activate
+python3 -m pip install --upgrade pip
+
+pip install dvc
+pip install 'dvc[ssh]'
+dvc pull
+
+echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iqser.com/repository/python-combind/simple" >> pip.conf
+docker build -f Dockerfile  -t nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion} .
+echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
+docker push nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion}
--- a/bamboo-specs/src/main/resources/scripts/key-prepare.sh
+++ b/bamboo-specs/src/main/resources/scripts/key-prepare.sh
@ -0,0 +1,8 @@
+#!/bin/bash
+set -e
+
+mkdir -p ~/.ssh
+echo "${bamboo_agent_ssh}" | base64 -d >> ~/.ssh/id_rsa
+echo "host vector.iqser.com" > ~/.ssh/config
+echo "    user bamboo-agent" >> ~/.ssh/config
+chmod 600 ~/.ssh/config ~/.ssh/id_rsa
--- a/bamboo-specs/src/main/resources/scripts/sonar-scan.sh
+++ b/bamboo-specs/src/main/resources/scripts/sonar-scan.sh
@ -0,0 +1,67 @@
+#!/bin/bash
+set -e
+
+export JAVA_HOME=/usr/bin/sonar-scanner/jre
+
+python3 -m venv build_venv
+source build_venv/bin/activate
+python3 -m pip install --upgrade pip
+
+echo "dev setup for unit test and coverage"
+
+pip install -e incl/pyinfra
+pip install -r incl/pyinfra/requirements.txt
+
+pip install -e incl/pdf2image
+pip install -r incl/pdf2image/requirements.txt
+
+pip install -e .
+pip install -r requirements.txt
+
+
+echo "DVC pull step"
+dvc pull
+
+echo "coverage calculation"
+coverage run -m pytest
+echo "coverage report generation"
+coverage report -m
+coverage xml
+
+SERVICE_NAME=$1
+
+echo "dependency-check:aggregate"
+mkdir -p reports
+dependency-check --enableExperimental -f JSON -f HTML -f XML \
+  --disableAssembly -s . -o reports --project $SERVICE_NAME --exclude ".git/**" --exclude "venv/**" \
+  --exclude "build_venv/**" --exclude "**/__pycache__/**"
+
+if [[ -z "${bamboo_repository_pr_key}" ]]
+then
+    echo "Sonar Scan for branch: ${bamboo_planRepository_1_branch}"
+    /usr/bin/sonar-scanner/bin/sonar-scanner -X\
+      -Dsonar.projectKey=RED_$SERVICE_NAME \
+      -Dsonar.sources=src,cv_analysis \
+      -Dsonar.host.url=https://sonarqube.iqser.com \
+      -Dsonar.login=${bamboo_sonarqube_api_token_secret} \
+      -Dsonar.branch.name=${bamboo_planRepository_1_branch} \
+      -Dsonar.dependencyCheck.jsonReportPath=reports/dependency-check-report.json \
+      -Dsonar.dependencyCheck.xmlReportPath=reports/dependency-check-report.xml \
+      -Dsonar.dependencyCheck.htmlReportPath=reports/dependency-check-report.html \
+      -Dsonar.python.coverage.reportPaths=reports/coverage.xml
+
+else
+    echo "Sonar Scan for PR with key1: ${bamboo_repository_pr_key}"
+    /usr/bin/sonar-scanner/bin/sonar-scanner \
+      -Dsonar.projectKey=RED_$SERVICE_NAME \
+      -Dsonar.sources=src,cv_analysis \
+      -Dsonar.host.url=https://sonarqube.iqser.com \
+      -Dsonar.login=${bamboo_sonarqube_api_token_secret} \
+      -Dsonar.pullrequest.key=${bamboo_repository_pr_key} \
+      -Dsonar.pullrequest.branch=${bamboo_repository_pr_sourceBranch} \
+      -Dsonar.pullrequest.base=${bamboo_repository_pr_targetBranch} \
+      -Dsonar.dependencyCheck.jsonReportPath=reports/dependency-check-report.json \
+      -Dsonar.dependencyCheck.xmlReportPath=reports/dependency-check-report.xml \
+      -Dsonar.dependencyCheck.htmlReportPath=reports/dependency-check-report.html \
+      -Dsonar.python.coverage.reportPaths=reports/coverage.xml
+fi
--- a/bamboo-specs/src/test/java/buildjob/PlanSpecTest.java
+++ b/bamboo-specs/src/test/java/buildjob/PlanSpecTest.java
@ -0,0 +1,22 @@
+package buildjob;
+
+
+import com.atlassian.bamboo.specs.api.builders.plan.Plan;
+import com.atlassian.bamboo.specs.api.exceptions.PropertiesValidationException;
+import com.atlassian.bamboo.specs.api.util.EntityPropertiesBuilders;
+import org.junit.Test;
+
+public class PlanSpecTest {
+    @Test
+    public void checkYourPlanOffline() throws PropertiesValidationException {
+        Plan plan = new PlanSpec().createDockerBuildPlan();
+
+        EntityPropertiesBuilders.build(plan);
+    }
+
+    @Test
+    public void checkYourSecPlanOffline() throws PropertiesValidationException {
+        Plan secPlan = new PlanSpec().createSecBuild();
+        EntityPropertiesBuilders.build(secPlan);
+    }
+}
--- a/bom.json
+++ b/bom.json
--- a/config/pyinfra.toml
+++ b/config/pyinfra.toml
@ -1,67 +0,0 @@
-
-[asyncio]
-max_concurrent_tasks = 10
-
-[dynamic_tenant_queues]
-enabled = true
-
-[metrics.prometheus]
-enabled = true
-prefix = "redactmanager_cv_analysis_service"
-
-[tracing]
-enabled = true
-# possible values "opentelemetry" | "azure_monitor" (Excpects APPLICATIONINSIGHTS_CONNECTION_STRING environment variable.)
-type = "azure_monitor"
-
-[tracing.opentelemetry]
-endpoint = "http://otel-collector-opentelemetry-collector.otel-collector:4318/v1/traces"
-service_name = "redactmanager_cv_analysis_service"
-exporter = "otlp"
-
-[webserver]
-host = "0.0.0.0"
-port = 8080
-
-[rabbitmq]
-host = "localhost"
-port = 5672
-username = ""
-password = ""
-heartbeat = 60
-# Has to be a divider of heartbeat, and shouldn't be too big, since only in these intervals queue interactions happen (like receiving new messages)
-# This is also the minimum time the service needs to process a message
-connection_sleep = 5
-input_queue = "request_queue"
-output_queue = "response_queue"
-dead_letter_queue = "dead_letter_queue"
-
-tenant_event_queue_suffix = "_tenant_event_queue"
-tenant_event_dlq_suffix = "_tenant_events_dlq"
-tenant_exchange_name = "tenants-exchange"
-queue_expiration_time = 300000                                   # 5 minutes in milliseconds
-service_request_queue_prefix = "cv_analysis_request_queue"
-service_request_exchange_name = "cv_analysis_request_exchange"
-service_response_exchange_name = "cv_analysis_response_exchange"
-service_dlq_name = "cv_analysis_dlq"
-
-[storage]
-backend = "s3"
-
-[storage.s3]
-bucket = "redaction"
-endpoint = "http://127.0.0.1:9000"
-key = ""
-secret = ""
-region = "eu-central-1"
-
-[storage.azure]
-container = "redaction"
-connection_string = ""
-
-[storage.tenant_server]
-public_key = ""
-endpoint = "http://tenant-user-management:8081/internal-api/tenants"
-
-[kubernetes]
-pod_name = "test_pod"
--- a/config/settings.toml
+++ b/config/settings.toml
@ -1,19 +0,0 @@
-[logging]
-level = "INFO"
-visual_logging_level = "DISABLED"
-visual_logging_output_folder = "/tmp/debug"
-
-[table_parsing]
-skip_pages_without_images = true
-
-[paths]
-root = "@format {env[ROOT_PATH]}"
-dvc_data_dir = "${paths.root}/data"
-pdf_for_testing = "${paths.dvc_data_dir}/pdfs_for_testing"
-png_for_testing = "${paths.dvc_data_dir}/pngs_for_testing"
-png_figures_detected = "${paths.png_for_testing}/figures_detected"
-png_tables_detected = "${paths.png_for_testing}/tables_detected_by_tp"
-hashed_pdfs_for_testing = "${paths.pdf_for_testing}/hashed"
-metadata_test_files = "${paths.dvc_data_dir}/metadata_testing_files.csv"
-test_dir = "${paths.dvc_data_dir}/test"
-test_data_dir = "${paths.dvc_data_dir}/test/test_data"
--- a/src/cv_analysis/init.py
+++ b/src/cv_analysis/init.py
--- a/cv_analysis/config.py
+++ b/cv_analysis/config.py
@ -0,0 +1,31 @@
+import os
+
+
+def get_config():
+    return Config()
+
+
+class Config:
+    def __init__(self):
+        self.logging_level_root = os.environ.get("LOGGING_LEVEL_ROOT", "INFO")
+        self.table_parsing_skip_pages_without_images = os.environ.get("TABLE_PARSING_SKIP_PAGES_WITHOUT_IMAGES", True)
+
+        # visual_logging_level: NOTHING > INFO > DEBUG > ALL
+        self.visual_logging_level = "DISABLED"
+        self.visual_logging_output_folder = "/tmp/debug"
+
+        # locations
+        # FIXME: is everything here necessary?
+        root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        self.dvc_data_dir = os.path.join(root, "data")
+        self.pdf_for_testing = os.path.join(self.dvc_data_dir, "pdfs_for_testing")
+        self.png_for_testing = os.path.join(self.dvc_data_dir, "pngs_for_testing")
+        self.png_figures_detected = os.path.join(self.png_for_testing, "figures_detected")
+        self.png_tables_detected = os.path.join(self.png_for_testing, "tables_detected_by_tp")
+        self.hashed_pdfs_for_testing = os.path.join(self.pdf_for_testing, "hashed")
+        self.metadata_test_files = os.path.join(self.dvc_data_dir, "metadata_testing_files.csv")
+        self.test_dir = os.path.join(root, "test")
+        self.test_data_dir = os.path.join(self.test_dir, "test_data")
+
+    def __getitem__(self, key):
+        return self.__getattribute__(key)
--- a/src/cv_analysis/figure_detection/init.py
+++ b/src/cv_analysis/figure_detection/init.py
--- a/src/cv_analysis/figure_detection/figure_detection.py
+++ b/src/cv_analysis/figure_detection/figure_detection.py
@ -1,40 +1,38 @@
 from functools import partial

-import cv2
 import numpy as np
+from funcy import lmap

 from cv_analysis.figure_detection.figures import detect_large_coherent_structures
 from cv_analysis.figure_detection.text import remove_primary_text_regions
+from cv_analysis.utils.conversion import contour_to_rectangle
 from cv_analysis.utils.filters import (
-    has_acceptable_format,
    is_large_enough,
-    is_not_too_large,
+    has_acceptable_format,
+    is_small_enough,
 )
 from cv_analysis.utils.postprocessing import remove_included
-from cv_analysis.utils.structures import Rectangle


-def detect_figures(image: np.ndarray):
+def detect_figures(image: np.array):
    max_area = image.shape[0] * image.shape[1] * 0.99
    min_area = 5000
    max_width_to_height_ratio = 6
    figure_filter = partial(is_likely_figure, min_area, max_area, max_width_to_height_ratio)

    image = remove_primary_text_regions(image)
-    cnts = detect_large_coherent_structures(image)
-    cnts = filter(figure_filter, cnts)
+    contours = detect_large_coherent_structures(image)
+    contours = filter(figure_filter, contours)

-    # rects = map(compose(Rectangle.from_xywh, cv2.boundingRect), (cnts))
+    rectangles = lmap(contour_to_rectangle, contours)
+    rectangles = remove_included(rectangles)

-    bounding_rects = map(cv2.boundingRect, cnts)
-    rects: list[Rectangle] = remove_included(map(Rectangle.from_xywh, rects))
-
-    return rects
+    return rectangles


-def is_likely_figure(min_area, max_area, max_width_to_height_ratio, cnts):
+def is_likely_figure(min_area, max_area, max_width_to_height_ratio, contours):
    return (
-        is_not_too_large(cnts, max_area)
-        and is_large_enough(cnts, min_area)
-        and has_acceptable_format(cnts, max_width_to_height_ratio)
+        is_small_enough(contours, max_area)
+        and is_large_enough(contours, min_area)
+        and has_acceptable_format(contours, max_width_to_height_ratio)
    )
--- a/src/cv_analysis/figure_detection/figures.py
+++ b/src/cv_analysis/figure_detection/figures.py
@ -1,25 +1,33 @@
 import cv2
 import numpy as np

+from cv_analysis.utils.common import find_contours_and_hierarchies

-def detect_large_coherent_structures(image: np.ndarray):
-    """Detects large coherent structures on an image.
+
+def detect_large_coherent_structures(image: np.array):
+    """Detects large coherent structures in an image.
    Expects an image with binary color space (e.g. threshold applied).

+    Args:
+        image (np.array): Image to look for large coherent structures in.
+
    Returns:
-        contours
+        list: List of contours.

    References:
         https://stackoverflow.com/questions/60259169/how-to-group-nearby-contours-in-opencv-python-zebra-crossing-detection
    """
    assert len(image.shape) == 2

+    # FIXME: Parameterize via factory
    dilate_kernel = cv2.getStructuringElement(cv2.MORPH_OPEN, (5, 5))
+    # FIXME: Parameterize via factory
    dilate = cv2.dilate(image, dilate_kernel, iterations=4)
-
+    # FIXME: Parameterize via factory
    close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 20))
-    close = cv2.morphologyEx(dilate, cv2.MORPH_CLOSE, close_kernel, iterations=1)
+    # FIXME: Parameterize via factory
+    close = cv2.morphologyEx(dilate, cv2.MORPH_CLOSE, close_kernel, iterations=1)  # TODO: Tweak iterations

-    cnts, _ = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    contours, _ = find_contours_and_hierarchies(close)

-    return cnts
+    return contours
--- a/src/cv_analysis/figure_detection/text.py
+++ b/src/cv_analysis/figure_detection/text.py
@ -1,5 +1,7 @@
 import cv2

+from cv_analysis.utils.common import normalize_to_gray_scale
+

 def remove_primary_text_regions(image):
    """Removes regions of primary text, meaning no figure descriptions for example, but main text body paragraphs.
@ -35,6 +37,7 @@ def remove_primary_text_regions(image):

 def apply_threshold_to_image(image):
    """Converts an image to black and white."""
+    image = normalize_to_gray_scale(image)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image
    return cv2.threshold(image, 253, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

--- a/cv_analysis/layout_parsing.py
+++ b/cv_analysis/layout_parsing.py
@ -0,0 +1,80 @@
+from functools import partial
+from typing import Iterable, List
+
+import cv2
+import numpy as np
+from funcy import compose, rcompose, lkeep
+
+from cv_analysis.utils import lstarkeep
+from cv_analysis.utils.common import (
+    find_contours_and_hierarchies,
+    dilate_page_components,
+    normalize_to_gray_scale,
+    threshold_image,
+    invert_image,
+    fill_rectangles,
+)
+from cv_analysis.utils.conversion import contour_to_rectangle
+from cv_analysis.utils.merging import merge_related_rectangles
+from cv_analysis.utils.postprocessing import remove_included, has_no_parent
+from cv_analysis.utils.rectangle import Rectangle
+
+
+def parse_layout(image: np.array) -> List[Rectangle]:
+    """Parse the layout of a page.
+
+    Args:
+        image: Image of the page.
+
+    Returns:
+        List of rectangles representing the layout of the page as identified page elements.
+    """
+    rectangles = rcompose(
+        find_segments,
+        remove_included,
+        merge_related_rectangles,
+        remove_included,
+    )(image)
+
+    return rectangles
+
+
+def find_segments(image: np.ndarray) -> List[Rectangle]:
+    """Find segments in a page. Segments are structural elements of a page, such as text blocks, tables, etc."""
+    rectangles = rcompose(
+        prepare_for_initial_detection,
+        __find_segments,
+        partial(prepare_for_meta_detection, image.copy()),
+        __find_segments,
+    )(image)
+
+    return rectangles
+
+
+def prepare_for_initial_detection(image: np.ndarray) -> np.ndarray:
+    return compose(dilate_page_components, normalize_to_gray_scale)(image)
+
+
+def __find_segments(image: np.ndarray) -> List[Rectangle]:
+    def to_rectangle_if_valid(contour, hierarchy):
+        return contour_to_rectangle(contour) if is_likely_segment(contour) and has_no_parent(hierarchy) else None
+
+    rectangles = lstarkeep(to_rectangle_if_valid, zip(*find_contours_and_hierarchies(image)))
+
+    return rectangles
+
+
+def prepare_for_meta_detection(image: np.ndarray, rectangles: Iterable[Rectangle]) -> np.ndarray:
+    image = rcompose(
+        fill_rectangles,
+        threshold_image,
+        invert_image,
+        normalize_to_gray_scale,
+    )(image, rectangles)
+
+    return image
+
+
+def is_likely_segment(rectangle: Rectangle, min_area: float = 100) -> bool:
+    # FIXME: Parameterize via factory
+    return cv2.contourArea(rectangle, False) > min_area
--- a/cv_analysis/locations.py
+++ b/cv_analysis/locations.py
@ -0,0 +1,14 @@
+"""Defines constant paths relative to a root path."""
+
+from pathlib import Path
+
+MODULE_PATH = Path(__file__).resolve().parents[0]
+PACKAGE_ROOT_PATH = MODULE_PATH.parents[0]
+REPO_ROOT_PATH = PACKAGE_ROOT_PATH
+
+TEST_DIR_PATH = REPO_ROOT_PATH / "test"
+TEST_DATA_DIR = TEST_DIR_PATH / "data"
+TEST_DATA_DIR_DVC = TEST_DIR_PATH / "data.dvc"
+TEST_DATA_SYNTHESIS_DIR = TEST_DATA_DIR / "synthesis"
+TEST_PAGE_TEXTURES_DIR = TEST_DATA_SYNTHESIS_DIR / "paper"
+TEST_SMILES_FILE = TEST_DATA_SYNTHESIS_DIR / "smiles.csv"
--- a/cv_analysis/logging.py
+++ b/cv_analysis/logging.py
@ -0,0 +1,84 @@
+import sys
+from functools import wraps
+from operator import attrgetter
+from typing import Callable, Any
+
+import loguru
+from funcy import log_calls, log_enters, log_exits
+
+logger = loguru.logger
+logger.remove()
+
+debug_logger = loguru.logger
+debug_logger.add(
+    sink=sys.stderr,
+    format="<blue>{time:YYYY-MM-DD at HH:mm:ss}</blue> | <level>{level: <8}</level> | <cyan>{name}</cyan>: <level>{message}</level>",
+    level="DEBUG",
+)
+
+dev_logger = loguru.logger
+dev_logger.add(
+    sink=sys.stderr,
+    format="<green>{time:YYYY-MM-DD at HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>: <level>{message}</level>",
+    level="DEBUG",
+)
+
+prod_logger = loguru.logger
+prod_logger.add(
+    sink=sys.stderr,
+    format="<white>{time:YYYY-MM-DD at HH:mm:ss}</white> | <level>{level: <8}</level> | <cyan>{name}</cyan>: <level>{message}</level>",
+    level="INFO",
+    enqueue=True,
+)
+
+# logger.remove()
+# logger.add(sink=sys.stderr, level="DEBUG", enqueue=True)
+
+
+def __log(logger, level: str, enters=True, exits=True) -> Callable:
+    print_func = get_print_func(logger, level)
+
+    def dec():
+        if enters and exits:
+            fn = log_calls
+        elif enters:
+            fn = log_enters
+        elif exits:
+            fn = log_exits
+        else:
+            raise ValueError("Must log either enters or exits")
+
+        return fn(print_func=print_func)
+
+    def inner(func: Callable) -> Callable:
+        @dec()
+        @wraps(func)
+        def inner(*args, **kwargs) -> Any:
+            return func(*args, **kwargs)
+
+        return inner
+
+    return inner
+
+
+def get_print_func(logger, level: str):
+    return attrgetter(level.lower())(logger)
+
+
+def debug_log(level: str = "TRACE", enters=True, exits=True) -> Callable:
+    return __log(debug_logger, level, enters=enters, exits=exits)
+
+
+def dev_log(level: str = "TRACE", enters=True, exits=True) -> Callable:
+    return __log(dev_logger, level, enters=enters, exits=exits)
+
+
+def prod_log(level: str = "TRACE", enters=True, exits=True) -> Callable:
+    return __log(prod_logger, level, enters=enters, exits=exits)
+
+
+def delay(fn, *args, **kwargs):
+    def inner():
+        return fn(*args, **kwargs)
+
+    return inner
--- a/src/cv_analysis/redaction_detection.py
+++ b/src/cv_analysis/redaction_detection.py
@ -2,17 +2,17 @@ from functools import partial

 import cv2
 import numpy as np
-from iteration_utilities import first, starfilter  # type: ignore
+from iteration_utilities import starfilter, first

-from cv_analysis.utils.filters import is_boxy, is_filled, is_large_enough
-from cv_analysis.utils.visual_logging import vizlogger
+from cv_analysis.utils.filters import is_large_enough, is_filled, is_boxy
+from cv_analysis.utils.visual_logger import vizlogger


 def is_likely_redaction(contour, hierarchy, min_area):
    return is_filled(hierarchy) and is_boxy(contour) and is_large_enough(contour, min_area)


-def find_redactions(image: np.ndarray, min_normalized_area=200000):
+def find_redactions(image: np.array, min_normalized_area=200000):
    vizlogger.debug(image, "redactions01_start.png")
    min_normalized_area /= 200  # Assumes 200 DPI PDF -> image conversion resolution

@ -29,14 +29,13 @@ def find_redactions(image: np.ndarray, min_normalized_area=200000):
    contours, hierarchies = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)

    try:
-        return list(
-            map(
-                first,
-                starfilter(
-                    partial(is_likely_redaction, min_area=min_normalized_area),
-                    zip(contours, hierarchies[0]),
-                ),
-            )
+        contours = map(
+            first,
+            starfilter(
+                partial(is_likely_redaction, min_area=min_normalized_area),
+                zip(contours, hierarchies[0]),
+            ),
        )
+        return list(contours)
    except:
        return []
--- a/src/cv_analysis/server/init.py
+++ b/src/cv_analysis/server/init.py
--- a/cv_analysis/server/pipeline.py
+++ b/cv_analysis/server/pipeline.py
@ -0,0 +1,60 @@
+from dataclasses import asdict
+from operator import truth
+
+from funcy import lmap, flatten
+
+from cv_analysis.figure_detection.figure_detection import detect_figures
+from cv_analysis.table_parsing import parse_tables
+from cv_analysis.utils.rectangle import Rectangle
+from pdf2img.conversion import convert_pages_to_images
+from pdf2img.default_objects.image import ImagePlus, ImageInfo
+from pdf2img.default_objects.rectangle import RectanglePlus
+
+
+def make_analysis_pipeline_for_element_type(segment_type, **kwargs):
+    if segment_type == "table":
+        return make_analysis_pipeline(parse_tables, table_parsing_formatter, dpi=200, **kwargs)
+    elif segment_type == "figure":
+        return make_analysis_pipeline(detect_figures, figure_detection_formatter, dpi=200, **kwargs)
+    else:
+        raise ValueError(f"Unknown segment type {segment_type}.")
+
+
+def make_analysis_pipeline(analysis_fn, formatter, dpi, skip_pages_without_images=False):
+    def analysis_pipeline(pdf: bytes, index=None):
+        def parse_page(page: ImagePlus):
+            image = page.asarray()
+            rectangles = analysis_fn(image)
+            if not rectangles:
+                return
+            infos = formatter(rectangles, page, dpi)
+            return infos
+
+        pages = convert_pages_to_images(pdf, index=index, dpi=dpi, skip_pages_without_images=skip_pages_without_images)
+        results = map(parse_page, pages)
+
+        yield from flatten(filter(truth, results))
+
+    return analysis_pipeline
+
+
+def table_parsing_formatter(rectangles, page: ImagePlus, dpi):
+    def format_rectangle(rectangle: Rectangle):
+        rectangle_plus = RectanglePlus.from_pixels(*rectangle_to_xyxy(rectangle), page.info, alpha=False, dpi=dpi)
+        return rectangle_plus.asdict(derotate=True)
+
+    bboxes = lmap(format_rectangle, rectangles)
+
+    return {"pageInfo": page.asdict(natural_index=True), "tableCells": bboxes}
+
+
+def figure_detection_formatter(rectangles, page, dpi):
+    def format_rectangle(rectangle: Rectangle):
+        rect_plus = RectanglePlus.from_pixels(*rectangle_to_xyxy(rectangle), page.info, alpha=False, dpi=dpi)
+        return asdict(ImageInfo(page.info, rect_plus.asbbox(derotate=False), rect_plus.alpha))
+
+    return lmap(format_rectangle, rectangles)
+
+
+def rectangle_to_xyxy(rectangle: Rectangle):
+    return rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y2
--- a/cv_analysis/table_parsing.py
+++ b/cv_analysis/table_parsing.py
@ -0,0 +1,129 @@
+import cv2
+import numpy as np
+from funcy import lmap, lfilter
+
+from cv_analysis.layout_parsing import parse_layout
+from cv_analysis.utils.conversion import box_to_rectangle
+from cv_analysis.utils.postprocessing import remove_isolated
+from cv_analysis.utils.visual_logger import vizlogger
+
+
+def add_external_contours(image, image_h_w_lines_only):
+
+    contours, _ = cv2.findContours(image_h_w_lines_only, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
+    for cnt in contours:
+        x, y, w, h = cv2.boundingRect(cnt)
+        cv2.rectangle(image, (x, y), (x + w, y + h), 255, 1)
+
+    return image
+
+
+def apply_motion_blur(image: np.array, angle, size=80):
+    """Solidifies and slightly extends detected lines.
+
+    Args:
+        image (np.array): page image as array
+        angle: direction in which to apply blur, 0 or 90
+        size (int): kernel size; 80 found empirically to work well
+
+    Returns:
+        np.ndarray
+    """
+    k = np.zeros((size, size), dtype=np.float32)
+    vizlogger.debug(k, "tables08_blur_kernel1.png")
+    k[(size - 1) // 2, :] = np.ones(size, dtype=np.float32)
+    vizlogger.debug(k, "tables09_blur_kernel2.png")
+    k = cv2.warpAffine(
+        k,
+        cv2.getRotationMatrix2D((size / 2 - 0.5, size / 2 - 0.5), angle, 1.0),
+        (size, size),
+    )
+    vizlogger.debug(k, "tables10_blur_kernel3.png")
+    k = k * (1.0 / np.sum(k))
+    vizlogger.debug(k, "tables11_blur_kernel4.png")
+    blurred = cv2.filter2D(image, -1, k)
+    return blurred
+
+
+def isolate_vertical_and_horizontal_components(img_bin):
+    """Identifies and reinforces horizontal and vertical lines in a binary image.
+
+    Args:
+        img_bin (np.array): array corresponding to single binarized page image
+
+    Returns:
+        np.ndarray
+    """
+    line_min_width = 48
+    kernel_h = np.ones((1, line_min_width), np.uint8)
+    kernel_v = np.ones((line_min_width, 1), np.uint8)
+
+    img_bin_h = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernel_h)
+    img_bin_v = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernel_v)
+    img_lines_raw = img_bin_v | img_bin_h
+
+    kernel_h = np.ones((1, 30), np.uint8)
+    kernel_v = np.ones((30, 1), np.uint8)
+    img_bin_h = cv2.dilate(img_bin_h, kernel_h, iterations=2)
+    img_bin_v = cv2.dilate(img_bin_v, kernel_v, iterations=2)
+
+    img_bin_h = apply_motion_blur(img_bin_h, 0)
+    img_bin_v = apply_motion_blur(img_bin_v, 90)
+
+    img_bin_extended = img_bin_h | img_bin_v
+
+    th1, img_bin_extended = cv2.threshold(img_bin_extended, 120, 255, cv2.THRESH_BINARY)
+    img_bin_final = cv2.dilate(img_bin_extended, np.ones((1, 1), np.uint8), iterations=1)
+    # add contours before lines are extended by blurring
+    img_bin_final = add_external_contours(img_bin_final, img_lines_raw)
+
+    return img_bin_final
+
+
+def find_table_layout_boxes(image: np.array):
+    def is_large_enough(box):
+        (x, y, w, h) = box
+        if w * h >= 100000:
+            return box_to_rectangle(box)
+
+    layout_boxes = parse_layout(image)
+    return lmap(is_large_enough, layout_boxes)
+
+
+def preprocess(image: np.array):
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image
+    _, image = cv2.threshold(image, 195, 255, cv2.THRESH_BINARY)
+    return ~image
+
+
+def turn_connected_components_into_rectangles(image: np.array):
+    def is_large_enough(stat):
+        x1, y1, w, h, area = stat
+        return area > 2000 and w > 35 and h > 25
+
+    _, _, stats, _ = cv2.connectedComponentsWithStats(~image, connectivity=8, ltype=cv2.CV_32S)
+
+    stats = lfilter(is_large_enough, stats)
+    if stats:
+        stats = np.vstack(stats)
+        return stats[:, :-1][2:]
+    return []
+
+
+def parse_tables(image: np.array):
+    """Runs the full table parsing process.
+
+    Args:
+        image (np.array): single PDF page, converted to a numpy array
+
+    Returns:
+        list: list of rectangles corresponding to table cells
+    """
+
+    image = preprocess(image)
+    image = isolate_vertical_and_horizontal_components(image)
+    boxes = turn_connected_components_into_rectangles(image)
+    rectangles = lmap(box_to_rectangle, boxes)
+    rectangles = remove_isolated(rectangles)
+
+    return rectangles
--- a/src/cv_analysis/utils/init.py
+++ b/src/cv_analysis/utils/init.py
--- a/src/cv_analysis/utils/banner.py
+++ b/src/cv_analysis/utils/banner.py
@ -1,13 +1,13 @@
 def make_art():
    art = r"""
-       __
-   _  |@@|
+       __           
+   _  |@@|          
  / \ \--/ __                                            .__               .__
  ) O|----|  |   __ ___  __         _____    ____ _____  |  | ___.__. _____|__| ______
 / / \ }{ /\ )_ / _\\  \/ /  ______ \__  \  /    \\__  \ |  | |  |  |/  ___/  |/  ___/
 )/  /\__/\ \__O (__ \   /  /_____/  / __ \|   |  \/ __ \|  |_\___  |\___ \|  |\___ \
-|/  (--/\--)    \__/  \_/           (______/___|__(______/____/\____/_____/|__/_____/
-/   _)(  )(_
-   `---''---`
+|/  (--/\--)    \__/  \_/           (______/___|__(______/____/\____/_____/|__/_____/ 
+/   _)(  )(_                                                                          
+   `---''---`       
 """
    return art
--- a/cv_analysis/utils/common.py
+++ b/cv_analysis/utils/common.py
@ -0,0 +1,51 @@
+from functools import reduce
+from typing import Iterable
+
+import cv2
+import numpy as np
+from funcy import first
+
+from cv_analysis.utils.rectangle import Rectangle
+
+
+def find_contours_and_hierarchies(image):
+    contours, hierarchies = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    return contours, first(hierarchies) if hierarchies is not None else None
+
+
+def dilate_page_components(image: np.ndarray) -> np.ndarray:
+    # FIXME: Parameterize via factory
+    image = cv2.GaussianBlur(image, (7, 7), 0)
+    # FIXME: Parameterize via factory
+    thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
+    # FIXME: Parameterize via factory
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+    # FIXME: Parameterize via factory
+    dilate = cv2.dilate(thresh, kernel, iterations=4)
+    return dilate
+
+
+def normalize_to_gray_scale(image: np.ndarray) -> np.ndarray:
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image
+    return image
+
+
+def threshold_image(image: np.ndarray) -> np.ndarray:
+    # FIXME: Parameterize via factory
+    _, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY)
+    return image
+
+
+def invert_image(image: np.ndarray):
+    return ~image
+
+
+def fill_rectangles(image: np.ndarray, rectangles: Iterable[Rectangle]) -> np.ndarray:
+    image = reduce(fill_in_component_area, rectangles, image)
+    return image
+
+
+def fill_in_component_area(image: np.ndarray, rectangle: Rectangle) -> np.ndarray:
+    cv2.rectangle(image, (rectangle.x1, rectangle.y1), (rectangle.x2, rectangle.y2), (0, 0, 0), -1)
+    cv2.rectangle(image, (rectangle.x1, rectangle.y1), (rectangle.x2, rectangle.y2), (255, 255, 255), 7)
+    return image
--- a/cv_analysis/utils/conversion.py
+++ b/cv_analysis/utils/conversion.py
@ -0,0 +1,47 @@
+import json
+from typing import Sequence, Union
+
+import cv2
+import numpy as np
+from PIL import Image
+
+from cv_analysis.utils.rectangle import Rectangle
+
+Image_t = Union[Image.Image, np.ndarray]
+
+
+def contour_to_rectangle(contour):
+    return box_to_rectangle(cv2.boundingRect(contour))
+
+
+def box_to_rectangle(box: Sequence[int]) -> Rectangle:
+    x, y, w, h = box
+    return Rectangle(x, y, x + w, y + h)
+
+
+def rectangle_to_box(rectangle: Rectangle) -> Sequence[int]:
+    return [rectangle.x1, rectangle.y1, rectangle.width, rectangle.height]
+
+
+class RectangleJSONEncoder(json.JSONEncoder):
+    def __init__(self, *args, **kwargs):
+        json.JSONEncoder.__init__(self, *args, **kwargs)
+        self._replacement_map = {}
+
+    def default(self, o):
+        if isinstance(o, Rectangle):
+            return {"x1": o.x1, "x2": o.x2, "y1": o.y1, "y2": o.y2}
+        else:
+            return json.JSONEncoder.default(self, o)
+
+    def encode(self, o):
+        result = json.JSONEncoder.encode(self, o)
+        return result
+
+
+def normalize_image_format_to_array(image: Image_t):
+    return np.array(image).astype(np.uint8) if isinstance(image, Image.Image) else image
+
+
+def normalize_image_format_to_pil(image: Image_t):
+    return Image.fromarray(image.astype(np.uint8)) if isinstance(image, np.ndarray) else image
--- a/cv_analysis/utils/display.py
+++ b/cv_analysis/utils/display.py
@ -0,0 +1,51 @@
+import cv2
+import numpy as np
+from PIL import Image
+from PIL.Image import Image as Image_t
+from matplotlib import pyplot as plt
+
+from cv_analysis.utils.conversion import normalize_image_format_to_array
+
+
+def show_image(image, backend="mpl", **kwargs):
+    image = normalize_image_format_to_array(image)
+    if backend == "mpl":
+        show_image_mpl(image, **kwargs)
+    elif backend == "cv2":
+        show_image_cv2(image, **kwargs)
+    elif backend == "pil":
+        Image.fromarray(image).show()
+    else:
+        raise ValueError(f"Unknown backend: {backend}")
+
+
+def show_image_cv2(image, maxdim=700, **kwargs):
+    h, w, c = image.shape
+    maxhw = max(h, w)
+    if maxhw > maxdim:
+        ratio = maxdim / maxhw
+        h = int(h * ratio)
+        w = int(w * ratio)
+
+    img = cv2.resize(image, (h, w))
+    cv2.imshow("", img)
+    cv2.waitKey(0)
+    cv2.destroyAllWindows()
+
+
+def show_image_mpl(image, **kwargs):
+    if isinstance(image, Image_t):
+        # noinspection PyTypeChecker
+        image = np.array(image)
+        # noinspection PyArgumentList
+        assert image.max() <= 255
+    fig, ax = plt.subplots(1, 1)
+    fig.set_size_inches(20, 20)
+    assert image.dtype == np.uint8
+    ax.imshow(image, cmap="gray")
+    ax.title.set_text(kwargs.get("title", ""))
+    plt.show()
+
+
+def save_image(image, path):
+    cv2.imwrite(path, image)
--- a/src/cv_analysis/utils/draw.py
+++ b/src/cv_analysis/utils/draw.py
@ -1,18 +1,23 @@
+from typing import Union
+
 import cv2
+import numpy as np
+from PIL import Image

 from cv_analysis.utils import copy_and_normalize_channels


-def draw_contours(image, contours, color=None, annotate=False):
+def draw_contours(image, contours):
+
    image = copy_and_normalize_channels(image)

-    for cont in contours:
-        cv2.drawContours(image, cont, -1, (0, 255, 0), 4)
+    for contour in contours:
+        cv2.drawContours(image, contour, -1, (0, 255, 0), 4)

    return image


-def draw_rectangles(image, rectangles, color=None, annotate=False):
+def draw_rectangles(image: Union[np.ndarray, Image.Image], rectangles, color=None, annotate=False, filled=False):
    def annotate_rect(x, y, w, h):
        cv2.putText(
            image,
@ -20,18 +25,18 @@ def draw_rectangles(image, rectangles, color=None, annotate=False):
            (x + (w // 2) - 12, y + (h // 2) + 9),
            cv2.FONT_HERSHEY_SIMPLEX,
            1,
-            (0, 255, 0),
+            (0, 255, 0, 255),
            2,
        )

    image = copy_and_normalize_channels(image)

    if not color:
-        color = (0, 255, 0)
+        color = (0, 255, 0, 255)

    for rect in rectangles:
        x, y, w, h = rect
-        cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
+        cv2.rectangle(image, (x, y), (x + w, y + h), color, -1 if filled else 1)
        if annotate:
            annotate_rect(x, y, w, h)

--- a/src/cv_analysis/utils/filters.py
+++ b/src/cv_analysis/utils/filters.py
@ -5,7 +5,7 @@ def is_large_enough(cont, min_area):
    return cv2.contourArea(cont, False) > min_area


-def is_not_too_large(cnt, max_area):
+def is_small_enough(cnt, max_area):
    return cv2.contourArea(cnt, False) < max_area


--- a/cv_analysis/utils/geometric.py
+++ b/cv_analysis/utils/geometric.py
@ -0,0 +1,13 @@
+from cv_analysis.utils.rectangle import Rectangle
+
+
+def is_square_like(box: Rectangle):
+    return box.width / box.height > 0.5 and box.height / box.width > 0.5
+
+
+def is_wide(box: Rectangle):
+    return box.width / box.height > 1.5
+
+
+def is_tall(box: Rectangle):
+    return box.height / box.width > 1.5
--- a/cv_analysis/utils/image_operations.py
+++ b/cv_analysis/utils/image_operations.py
@ -0,0 +1,115 @@
+from typing import Tuple
+
+import cv2 as cv
+import numpy as np
+from PIL import ImageOps, Image
+from loguru import logger
+
+from cv_analysis.utils.conversion import normalize_image_format_to_pil
+
+Color = Tuple[int, int, int]
+
+
+def blur(image: np.ndarray):
+    return cv.blur(image, (3, 3))
+
+
+def sharpen(image: np.ndarray):
+    return cv.filter2D(image, -1, np.array([[-1, -1, -1], [-1, 6, -1], [-1, -1, -1]]))
+
+
+def overlay(images, mode=np.sum):
+    assert mode in [np.sum, np.max]
+    images = np.stack(list(images))
+    image = mode(images, axis=0)
+    image = (image / image.max() * 255).astype(np.uint8)
+    return image
+
+
+def tint_image(src, color="#FFFFFF"):
+    src.load()
+    r, g, b, alpha = src.split()
+    gray = ImageOps.grayscale(src)
+    result = ImageOps.colorize(gray, (0, 0, 0), color)
+    result.putalpha(alpha)
+    return result
+
+
+def color_shift_array(image: np.ndarray, color: Color):
+    """Creates a 3-tensor from a 2-tensor by stacking the 2-tensor three times weighted by the color tuple."""
+    assert image.ndim == 3
+    assert image.shape[-1] == 3
+    assert isinstance(color, tuple)
+    assert max(color) <= 255
+    assert image.max() <= 255
+
+    color = np.array(color)
+    weights = color / color.sum() / 10
+    assert max(weights) <= 1
+
+    colored = (image * weights).astype(np.uint8)
+
+    assert colored.shape == image.shape
+
+    return colored
+
+
+def superimpose(
+    base_image: Image,
+    image_to_superimpose: Image,
+    crop_to_content=True,
+    pad=True,
+) -> Image:
+    """Superimposes an image with transparency onto another image.
+
+    Args:
+        base_image: The page image.
+        image_to_superimpose: The texture image.
+        crop_to_content: If True, the texture will be cropped to content (i.e. the bounding box of all non-transparent
+            parts of the texture image).
+        pad: If True, the texture will be padded to the size of the page.
+
+    Returns:
+        Image where the texture is superimposed onto the page.
+    """
+    base_image = normalize_image_format_to_pil(base_image)
+    image_to_superimpose = normalize_image_format_to_pil(image_to_superimpose)
+
+    if crop_to_content:
+        image_to_superimpose = image_to_superimpose.crop(image_to_superimpose.getbbox())
+
+    if base_image.size != image_to_superimpose.size:
+        logger.trace(f"Size of page and texture do not match: {base_image.size} != {image_to_superimpose.size}")
+        if pad:
+            logger.trace(f"Padding texture before pasting to fit size {base_image.size}")
+            image_to_superimpose = pad_image_to_size(image_to_superimpose, base_image.size)
+        else:
+            logger.trace(f"Resizing texture before pasting to fit size {base_image.size}")
+            image_to_superimpose = image_to_superimpose.resize(base_image.size)
+
+    assert base_image.size == image_to_superimpose.size
+    assert image_to_superimpose.mode == "RGBA"
+
+    base_image.paste(image_to_superimpose, (0, 0), image_to_superimpose)
+    return base_image
+
+
+def pad_image_to_size(image: Image, size: Tuple[int, int]) -> Image:
+    """Pads an image to a given size."""
+    if image.size == size:
+        return image
+
+    if image.size[0] > size[0] or image.size[1] > size[1]:
+        raise ValueError(f"Image size {image.size} is larger than target size {size}.")
+
+    padded = Image.new(image.mode, size, color=255)
+
+    pasting_coords = compute_pasting_coordinates(image, padded)
+    assert image.mode == "RGBA"
+    padded.paste(image, pasting_coords)
+    return padded
+
+
+def compute_pasting_coordinates(smaller: Image, larger: Image.Image):
+    """Computes the coordinates for centrally pasting a smaller image onto a larger image."""
+    return abs(larger.width - smaller.width) // 2, abs(larger.height - smaller.height) // 2
--- a/cv_analysis/utils/input.py
+++ b/cv_analysis/utils/input.py
@ -0,0 +1,29 @@
+from numpy import array, ndarray
+import pdf2image
+from PIL import Image
+
+from cv_analysis.utils.preprocessing import preprocess_page_array
+
+
+def open_analysis_input_file(path_or_bytes, first_page=1, last_page=None):
+
+    assert first_page > 0, "Page numbers are 1-based."
+    assert last_page is None or last_page >= first_page, "last_page must be greater than or equal to first_page."
+
+    last_page = last_page or first_page
+
+    if type(path_or_bytes) == str:
+        if path_or_bytes.lower().endswith((".png", ".jpg", ".jpeg")):
+            pages = [Image.open(path_or_bytes)]
+        elif path_or_bytes.lower().endswith(".pdf"):
+            pages = pdf2image.convert_from_path(path_or_bytes, first_page=first_page, last_page=last_page)
+        else:
+            raise IOError("Invalid file extension. Accepted filetypes: .png, .jpg, .jpeg, .pdf")
+    elif type(path_or_bytes) == bytes:
+        pages = pdf2image.convert_from_bytes(path_or_bytes, first_page=first_page, last_page=last_page)
+    elif type(path_or_bytes) in {list, ndarray}:
+        return path_or_bytes
+
+    pages = [preprocess_page_array(array(p)) for p in pages]
+
+    return pages
--- a/cv_analysis/utils/merging.py
+++ b/cv_analysis/utils/merging.py
@ -0,0 +1,54 @@
+from functools import reduce
+from itertools import combinations
+from typing import List, Tuple, Set
+
+from funcy import all
+
+from cv_analysis.utils import until, make_merger_sentinel
+from cv_analysis.utils.rectangle import Rectangle
+from cv_analysis.utils.spacial import related
+
+
+def merge_related_rectangles(rectangles: List[Rectangle]) -> List[Rectangle]:
+    """Merges rectangles that are related to each other, iterating on partial merge results until no more mergers are
+    possible."""
+    assert isinstance(rectangles, list)
+    no_new_merges = make_merger_sentinel()
+    return until(no_new_merges, merge_rectangles_once, rectangles)
+
+
+def merge_rectangles_once(rectangles: List[Rectangle]) -> List[Rectangle]:
+    """Merges rectangles that are related to each other, but does not iterate on the results."""
+    rectangles = set(rectangles)
+    merged, used = reduce(merge_if_related, combinations(rectangles, 2), (set(), set()))
+
+    return list(merged | rectangles - used)
+
+
+T = Tuple[Set[Rectangle], Set[Rectangle]]
+V = Tuple[Rectangle, Rectangle]
+
+
+def merge_if_related(merged_and_used_so_far: T, rectangle_pair: V) -> T:
+    """Merges two rectangles if they are related, otherwise returns the accumulator unchanged."""
+    alpha, beta = rectangle_pair
+    merged, used = merged_and_used_so_far
+
+    def unused(*args) -> bool:
+        return not used & {*args}
+
+    if all(unused, (alpha, beta)) and related(alpha, beta):
+        return merged | {bounding_rect(alpha, beta)}, used | {alpha, beta}
+
+    else:
+        return merged, used
+
+
+def bounding_rect(alpha: Rectangle, beta: Rectangle) -> Rectangle:
+    """Returns the smallest rectangle that contains both rectangles."""
+    return Rectangle(
+        min(alpha.x1, beta.x1),
+        min(alpha.y1, beta.y1),
+        max(alpha.x2, beta.x2),
+        max(alpha.y2, beta.y2),
+    )
--- a/cv_analysis/utils/metrics.py
+++ b/cv_analysis/utils/metrics.py
@ -0,0 +1,56 @@
+from functools import reduce
+from operator import itemgetter
+from typing import Iterable
+
+import numpy as np
+from funcy import lmap, lpluck, first
+
+from cv_analysis.utils import lift
+from cv_analysis.utils.rectangle import Rectangle
+
+
+def compute_document_score(result_dict, ground_truth_dicts):
+
+    extract_cells = lambda dicts: lpluck("cells", dicts["pages"])
+
+    cells_per_ground_truth_page, cells_per_result_page = map(extract_cells, (ground_truth_dicts, result_dict))
+    cells_on_page_to_rectangles = lift(rectangle_from_dict)
+    cells_on_pages_to_rectangles = lift(cells_on_page_to_rectangles)
+
+    rectangles_per_ground_truth_page, rectangles_per_result_page = map(
+        cells_on_pages_to_rectangles, (cells_per_ground_truth_page, cells_per_result_page)
+    )
+
+    scores = lmap(compute_page_iou, rectangles_per_result_page, rectangles_per_ground_truth_page)
+
+    n_cells_per_page = np.array(lmap(len, cells_per_ground_truth_page))
+    document_score = np.average(scores, weights=n_cells_per_page / n_cells_per_page.sum())
+
+    return document_score
+
+
+def rectangle_from_dict(d):
+    x1, y1, w, h = itemgetter("x", "y", "width", "height")(d)
+    return Rectangle(x1, y1, x1 + w, y1 + h)
+
+
+def compute_page_iou(predicted_rectangles: Iterable[Rectangle], true_rectangles: Iterable[Rectangle]):
+    def find_best_iou(sum_so_far_and_candidate_rectangles, true_rectangle):
+        sum_so_far, predicted_rectangles = sum_so_far_and_candidate_rectangles
+        best_match, best_iou = find_max_overlap(true_rectangle, predicted_rectangles)
+        return sum_so_far + best_iou, predicted_rectangles - {best_match}
+
+    predicted_rectangles = set(predicted_rectangles)
+    true_rectangles = set(true_rectangles)
+
+    iou_sum = first(reduce(find_best_iou, true_rectangles, (0, predicted_rectangles)))
+    normalizing_factor = 1 / max(len(predicted_rectangles), len(true_rectangles))
+    score = normalizing_factor * iou_sum
+
+    return score
+
+
+def find_max_overlap(rectangle: Rectangle, candidate_rectangles: Iterable[Rectangle]):
+    best_candidate_rectangle = max(candidate_rectangles, key=rectangle.iou)
+    iou = rectangle.iou(best_candidate_rectangle)
+    return best_candidate_rectangle, iou
--- a/cv_analysis/utils/morphing.py
+++ b/cv_analysis/utils/morphing.py
@ -0,0 +1,38 @@
+from typing import Tuple
+
+from PIL import Image
+from loguru import logger
+
+from cv_analysis.utils.image_operations import compute_pasting_coordinates
+from cv_analysis.utils.rectangle import Rectangle
+from synthesis.segment.content_rectangle import ContentRectangle
+
+
+def shrink_rectangle(rectangle: Rectangle, factor: float) -> Rectangle:
+    x1, y1, x2, y2 = compute_scaled_coordinates(rectangle, (1 - factor))
+
+    logger.trace(f"Shrinking {rectangle} by {factor} to ({x1}, {y1}, {x2}, {y2}).")
+
+    assert x1 >= rectangle.x1
+    assert y1 >= rectangle.y1
+    assert x2 <= rectangle.x2
+    assert y2 <= rectangle.y2
+
+    shrunk_rectangle = Rectangle(x1, y1, x2, y2)
+
+    if isinstance(rectangle, ContentRectangle):  # TODO: Refactor
+        shrunk_rectangle = ContentRectangle(*shrunk_rectangle.coords, rectangle.content)
+
+    return shrunk_rectangle
+
+
+def compute_scaled_coordinates(rectangle: Rectangle, factor: float) -> Tuple[int, int, int, int]:
+    # FIXME: Refactor: Using image to compute coordinates is not clean
+    image = Image.new("RGBA", (rectangle.width, rectangle.height))
+    scaled = image.resize((int(rectangle.width * factor), int(rectangle.height * factor)))
+
+    x1, y1 = compute_pasting_coordinates(scaled, image)
+    x1 = rectangle.x1 + x1
+    y1 = rectangle.y1 + y1
+    x2, y2 = x1 + scaled.width, y1 + scaled.height
+    return x1, y1, x2, y2
--- a/src/cv_analysis/utils/postprocessing.py
+++ b/src/cv_analysis/utils/postprocessing.py
@ -1,9 +1,10 @@
-from collections import namedtuple
 from functools import partial
-from itertools import compress, starmap
-from typing import Iterable, List
+from itertools import starmap, compress
+from typing import Iterable, List, Sequence

-from cv_analysis.utils.structures import Rectangle
+from funcy import lremove
+
+from cv_analysis.utils.rectangle import Rectangle


 def remove_overlapping(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
@ -18,15 +19,28 @@ def remove_overlapping(rectangles: Iterable[Rectangle]) -> List[Rectangle]:


 def remove_included(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
-    keep = [rect for rect in rectangles if not rect.is_included(rectangles)]
-    return keep
+    rectangles_to_keep = [rect for rect in rectangles if not rect.is_included(rectangles)]
+    return rectangles_to_keep
+
+
+def remove_small(boxes: Iterable[Rectangle], page_width, page_height, min_percentage=0.13) -> List[Rectangle]:
+    min_width = page_width * min_percentage
+    min_height = page_height * min_percentage
+
+    def small(box: Rectangle):
+        return box.width < min_width or box.height < min_height
+
+    return lremove(small, boxes)


 def __remove_isolated_unsorted(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
    def is_connected(rect: Rectangle, rectangles: Iterable[Rectangle]):
        return any(rect.adjacent(rect2) for rect2 in rectangles if not rect == rect2)

-    rectangles = list(filter(partial(is_connected, rectangles=list(rectangles)), rectangles))
+    if not isinstance(rectangles, list):
+        rectangles = list(rectangles)
+
+    rectangles = list(filter(partial(is_connected, rectangles=rectangles), rectangles))
    return rectangles


@ -43,9 +57,9 @@ def __remove_isolated_sorted(rectangles: Iterable[Rectangle]) -> List[Rectangle]
    return rectangles


-def remove_isolated(rectangles: Iterable[Rectangle], input_unsorted=True) -> List[Rectangle]:
+def remove_isolated(rectangles: Iterable[Rectangle], input_unsorted: bool = True) -> List[Rectangle]:
    return (__remove_isolated_unsorted if input_unsorted else __remove_isolated_sorted)(rectangles)


-def has_no_parent(hierarchy):
+def has_no_parent(hierarchy: Sequence[int]) -> bool:
    return hierarchy[-1] <= 0
--- a/src/cv_analysis/utils/preprocessing.py
+++ b/src/cv_analysis/utils/preprocessing.py
@ -1,5 +1,5 @@
-import cv2
 from numpy import frombuffer, ndarray
+import cv2


 def preprocess_page_array(page):
@ -10,6 +10,7 @@ def preprocess_page_array(page):


 def page2image(page):
+
    if type(page) == bytes:
        page = frombuffer(page)
    elif type(page) == ndarray:
--- a/cv_analysis/utils/rectangle.py
+++ b/cv_analysis/utils/rectangle.py
@ -0,0 +1,99 @@
+# See https://stackoverflow.com/a/33533514
+from __future__ import annotations
+
+from typing import Iterable, Union
+
+from funcy import identity
+
+from cv_analysis.utils.spacial import adjacent, contains, intersection, iou, area, is_contained, shift
+
+Coord = Union[int, float]
+
+
+class Rectangle:
+    def __init__(self, x1, y1, x2, y2, discrete=True):
+        """Creates a rectangle from two points."""
+        nearest_valid = int if discrete else identity
+
+        self.__x1 = nearest_valid(x1)
+        self.__y1 = nearest_valid(y1)
+        self.__x2 = nearest_valid(x2)
+        self.__y2 = nearest_valid(y2)
+
+    def __repr__(self):
+        return f"Rectangle({self.x1}, {self.y1}, {self.x2}, {self.y2})"
+
+    @property
+    def x1(self):
+        return self.__x1
+
+    @property
+    def x2(self):
+        return self.__x2
+
+    @property
+    def y1(self):
+        return self.__y1
+
+    @property
+    def y2(self):
+        return self.__y2
+
+    @property
+    def width(self):
+        return abs(self.x2 - self.x1)
+
+    @property
+    def height(self):
+        return abs(self.y2 - self.y1)
+
+    @property
+    def coords(self):
+        return [self.x1, self.y1, self.x2, self.y2]
+
+    @property
+    def size(self):
+        return self.width, self.height
+
+    def __hash__(self):
+        return hash((self.x1, self.y1, self.x2, self.y2))
+
+    def __iter__(self):
+        yield self.x1
+        yield self.y1
+        yield self.width
+        yield self.height
+
+    def area(self):
+        """Calculates the area of this rectangle."""
+        return area(self)
+
+    def intersection(self, other):
+        """Calculates the intersection of this and the given other rectangle."""
+        return intersection(self, other)
+
+    def iou(self, other: Rectangle):
+        """Calculates the intersection over union of this and the given other rectangle."""
+        return iou(self, other)
+
+    def includes(self, other: Rectangle, tol=3):
+        """Checks if this rectangle contains the given other."""
+        return contains(self, other, tol)
+
+    def is_included(self, rectangles: Iterable[Rectangle]):
+        """Checks if this rectangle is contained by any of the given rectangles."""
+        return is_contained(self, rectangles)
+
+    def adjacent(self, other: Rectangle, tolerance=7):
+        """Checks if this rectangle is adjacent to the given other."""
+        return adjacent(self, other, tolerance)
+
+    def shift(self, dx, dy):
+        """Shifts this rectangle by the given amount."""
+        x1, y1, x2, y2 = shift(self, dx, dy)
+        self.__x1 = x1
+        self.__y1 = y1
+        self.__x2 = x2
+        self.__y2 = y2
+
+        return self
--- a/cv_analysis/utils/spacial.py
+++ b/cv_analysis/utils/spacial.py
@ -0,0 +1,294 @@
+# See https://stackoverflow.com/a/39757388
+from __future__ import annotations
+
+from functools import lru_cache
+from operator import attrgetter
+from typing import TYPE_CHECKING, Iterable
+
+from funcy import juxt, rpartial, compose, lflatten, first, second
+
+from cv_analysis.utils import lift
+
+if TYPE_CHECKING:
+    from cv_analysis.utils.rectangle import Rectangle
+
+
+def adjacent(alpha: Rectangle, beta: Rectangle, tolerance=7, strict=False):
+    """Checks if the two rectangles are adjacent to each other.
+
+    Args:
+        alpha: The first rectangle.
+        beta: The second rectangle.
+        tolerance: The maximum distance between the two rectangles.
+        strict: If True, the rectangles must be adjacent along one axis and contained within the other axis. Else, the
+            rectangles must be adjacent along one axis and overlapping the other axis.
+    Returns:
+        True if the two rectangles are adjacent to each other, False otherwise.
+    """
+    select_strictness_variant = first if strict else second
+    test_candidates = [
+        # +---+
+        # |   | +---+
+        # | a | | b |
+        # |   | +___+
+        # +___+
+        (right_left_aligned_and_vertically_contained, right_left_aligned_and_vertically_overlapping),
+        #       +---+
+        # +---+ |   |
+        # | b | | a |
+        # +___+ |   |
+        #       +___+
+        (left_right_aligned_and_vertically_contained, left_right_aligned_and_vertically_overlapping),
+        # +-----------+
+        # |     a     |
+        # +___________+
+        #    +-----+
+        #    |  b  |
+        #    +_____+
+        (bottom_top_aligned_and_horizontally_contained, bottom_top_aligned_and_horizontally_overlapping),
+        #    +-----+
+        #    |  b  |
+        #    +_____+
+        # +-----------+
+        # |     a     |
+        # +___________+
+        (top_bottom_aligned_and_horizontally_contained, top_bottom_aligned_and_horizontally_overlapping),
+    ]
+
+    tests = map(select_strictness_variant, test_candidates)
+    return any(juxt(*tests)(alpha, beta, tolerance))
+
+
+def right_left_aligned_and_vertically_overlapping(alpha: Rectangle, beta: Rectangle, tol):
+    """Checks if the first rectangle is left of the other within a tolerance and also overlaps the other's y range."""
+    return adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
+        alpha.x2, beta.x1, beta.y1, beta.y2, alpha.y1, alpha.y2, tolerance=tol
+    )
+
+
+def left_right_aligned_and_vertically_overlapping(alpha: Rectangle, beta: Rectangle, tol):
+    """Checks if the first rectangle is right of the other within a tolerance and also overlaps the other's y range."""
+    return adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
+        alpha.x1, beta.x2, beta.y1, beta.y2, alpha.y1, alpha.y2, tolerance=tol
+    )
+
+
+def bottom_top_aligned_and_horizontally_overlapping(alpha: Rectangle, beta: Rectangle, tol):
+    """Checks if the first rectangle is above the other within a tolerance and also overlaps the other's x range."""
+    return adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
+        alpha.y2, beta.y1, beta.x1, beta.x2, alpha.x1, alpha.x2, tolerance=tol
+    )
+
+
+def top_bottom_aligned_and_horizontally_overlapping(alpha: Rectangle, beta: Rectangle, tol):
+    """Checks if the first rectangle is below the other within a tolerance and also overlaps the other's x range."""
+    return adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
+        alpha.y1, beta.y2, beta.x1, beta.x2, alpha.x1, alpha.x2, tolerance=tol
+    )
+
+
+def right_left_aligned_and_vertically_contained(alpha: Rectangle, beta: Rectangle, tol):
+    """Checks if the first rectangle is left of the other within a tolerance and also contains the other's y range."""
+    return adjacent_along_one_axis_and_contained_within_perpendicular_axis(
+        alpha.x2, beta.x1, beta.y1, beta.y2, alpha.y1, alpha.y2, tolerance=tol
+    )
+
+
+def left_right_aligned_and_vertically_contained(alpha: Rectangle, beta: Rectangle, tol):
+    """Checks if the first rectangle is right of the other within a tolerance and also contains the other's y range."""
+    return adjacent_along_one_axis_and_contained_within_perpendicular_axis(
+        alpha.x1, beta.x2, beta.y1, beta.y2, alpha.y1, alpha.y2, tolerance=tol
+    )
+
+
+def bottom_top_aligned_and_horizontally_contained(alpha: Rectangle, beta: Rectangle, tol):
+    """Checks if the first rectangle is above the other within a tolerance and also contains the other's x range."""
+    return adjacent_along_one_axis_and_contained_within_perpendicular_axis(
+        alpha.y2, beta.y1, beta.x1, beta.x2, alpha.x1, alpha.x2, tolerance=tol
+    )
+
+
+def top_bottom_aligned_and_horizontally_contained(alpha: Rectangle, beta: Rectangle, tol):
+    """Checks if the first rectangle is below the other within a tolerance and also contains the other's x range."""
+    return adjacent_along_one_axis_and_contained_within_perpendicular_axis(
+        alpha.y1, beta.y2, beta.x1, beta.x2, alpha.x1, alpha.x2, tolerance=tol
+    )
+
+
+def adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
+    axis_0_point_1,
+    axis_1_point_2,
+    axis_1_contained_point_1,
+    axis_1_contained_point_2,
+    axis_1_lower_bound,
+    axis_1_upper_bound,
+    tolerance,
+):
+    """Checks if two points are adjacent along one axis and two other points overlap a range along the perpendicular
+    axis.
+    """
+    return adjacent_along_one_axis_and_overlapping_or_contained_along_perpendicular_axis(
+        axis_0_point_1,
+        axis_1_point_2,
+        axis_1_contained_point_1,
+        axis_1_contained_point_2,
+        axis_1_lower_bound,
+        axis_1_upper_bound,
+        tolerance,
+        mode="overlapping",
+    )
+
+
+def adjacent_along_one_axis_and_contained_within_perpendicular_axis(
+    axis_0_point_1,
+    axis_1_point_2,
+    axis_1_contained_point_1,
+    axis_1_contained_point_2,
+    axis_1_lower_bound,
+    axis_1_upper_bound,
+    tolerance,
+):
+    """Checks if two points are adjacent along one axis and two other points overlap a range along the perpendicular
+    axis.
+    """
+    return adjacent_along_one_axis_and_overlapping_or_contained_along_perpendicular_axis(
+        axis_0_point_1,
+        axis_1_point_2,
+        axis_1_contained_point_1,
+        axis_1_contained_point_2,
+        axis_1_lower_bound,
+        axis_1_upper_bound,
+        tolerance,
+        mode="contained",
+    )
+
+
+def adjacent_along_one_axis_and_overlapping_or_contained_along_perpendicular_axis(
+    axis_0_point_1,
+    axis_1_point_2,
+    axis_1_contained_point_1,
+    axis_1_contained_point_2,
+    axis_1_lower_bound,
+    axis_1_upper_bound,
+    tolerance,
+    mode,
+):
+    """Checks if two points are adjacent along one axis and two other points overlap a range along the perpendicular
+    axis or are contained in that range, depending on the mode specified.
+    """
+    assert mode in ["overlapping", "contained"]
+    quantifier = any if mode == "overlapping" else all
+    return all(
+        [
+            abs(axis_0_point_1 - axis_1_point_2) <= tolerance,
+            quantifier(
+                [
+                    axis_1_lower_bound <= p <= axis_1_upper_bound
+                    for p in [axis_1_contained_point_1, axis_1_contained_point_2]
+                ]
+            ),
+        ]
+    )
+
+
+def contains(alpha: Rectangle, beta: Rectangle, tol=3):
+    """Checks if the first rectangle contains the second rectangle."""
+    return (
+        beta.x1 + tol >= alpha.x1
+        and beta.y1 + tol >= alpha.y1
+        and beta.x2 - tol <= alpha.x2
+        and beta.y2 - tol <= alpha.y2
+    )
+
+
+def is_contained(rectangle: Rectangle, rectangles: Iterable[Rectangle]):
+    """Checks if the rectangle is contained within any of the other rectangles."""
+    other_rectangles = filter(lambda r: r != rectangle, rectangles)
+    return any(map(rpartial(contains, rectangle), other_rectangles))
+
+
+def iou(alpha: Rectangle, beta: Rectangle):
+    """Calculates the intersection area over the union area of two rectangles."""
+    return intersection(alpha, beta) / union(alpha, beta)
+
+
+def area(rectangle: Rectangle):
+    """Calculates the area of a rectangle."""
+    return abs((rectangle.x2 - rectangle.x1) * (rectangle.y2 - rectangle.y1))
+
+
+def union(alpha: Rectangle, beta: Rectangle):
+    """Calculates the union area of two rectangles."""
+    return area(alpha) + area(beta) - intersection(alpha, beta)
+
+
+@lru_cache(maxsize=1000)
+def intersection(alpha, beta):
+    """Calculates the intersection of two rectangles."""
+    return intersection_along_x_axis(alpha, beta) * intersection_along_y_axis(alpha, beta)
+
+
+def intersection_along_x_axis(alpha, beta):
+    """Calculates the intersection along the x-axis."""
+    return intersection_along_axis(alpha, beta, "x")
+
+
+def intersection_along_y_axis(alpha, beta):
+    """Calculates the intersection along the y-axis."""
+    return intersection_along_axis(alpha, beta, "y")
+
+
+def intersection_along_axis(alpha, beta, axis):
+    """Calculates the intersection along the given axis.
+
+    Cases:
+       a      b
+    [-----] (---)  ==> [a1, b1, a2, b2] ==> max(0, (a2 - b1)) = 0
+      b      a
+    (---) [-----]  ==> [b1, a1, b2, a2] ==> max(0, (b2 - a1)) = 0
+        a   b
+    [--(----]----) ==> [a1, b1, a2, b2] ==> max(0, (a2 - b1)) = (a2 - b1)
+        a b
+    (-[---]----)   ==> [b1, a1, a2, b2] ==> max(0, (a2 - a1)) = (a2 - a1)
+        b a
+    [-(---)----]   ==> [a1, b1, b2, a2] ==> max(0, (b2 - b1)) = (b2 - b1)
+        b    a
+    (----[--)----] ==> [b1, a1, b2, a2] ==> max(0, (b2 - a1)) = (b2 - a1)
+    """
+    assert axis in ["x", "y"]
+
+    def get_component_accessor(component):
+        """Returns a function that accesses the given component of a rectangle."""
+        return attrgetter(f"{axis}{component}")
+
+    def make_access_components_and_sort_fn(component):
+        """Returns a function that accesses and sorts the given component of multiple rectangles."""
+        assert component in [1, 2]
+        return compose(sorted, lift(get_component_accessor(component)))
+
+    sort_first_components, sort_second_components = map(make_access_components_and_sort_fn, [1, 2])
+
+    min_c1, max_c1, min_c2, max_c2 = lflatten(juxt(sort_first_components, sort_second_components)((alpha, beta)))
+    intersection = max(0, min_c2 - max_c1)
+    return intersection
+
+
+def related(alpha: Rectangle, beta: Rectangle):
+    """Checks if two rectangles lie close by or overlap."""
+    return close(alpha, beta) or overlap(alpha, beta)
+
+
+def close(alpha: Rectangle, beta: Rectangle, max_gap=14):
+    """Checks if two rectangles are close to each other."""
+    # FIXME: Parameterize via factory
+    return adjacent(alpha, beta, tolerance=max_gap, strict=True)
+
+
+def overlap(alpha: Rectangle, beta: Rectangle):
+    """Checks if two rectangles overlap."""
+    return intersection(alpha, beta) > 0
+
+
+def shift(rectangle: Rectangle, dx: int, dy: int):
+    """Shifts a rectangle by the given amount."""
+    return rectangle.x1 + dx, rectangle.y1 + dy, rectangle.x2 + dx, rectangle.y2 + dy
--- a/cv_analysis/utils/utils.py
+++ b/cv_analysis/utils/utils.py
@ -0,0 +1,90 @@
+from __future__ import annotations
+
+import itertools
+
+import cv2
+import numpy as np
+from PIL import Image
+from funcy import first, iterate, keep, lmap, repeatedly
+from numpy import generic
+
+
+def copy_and_normalize_channels(image):
+
+    if isinstance(image, Image.Image):
+        image = np.array(image)
+
+    image = image.copy()
+    try:
+        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
+    except cv2.error:
+        pass
+
+    return image
+
+
+def npconvert(ob):
+    if isinstance(ob, generic):
+        return ob.item()
+    raise TypeError
+
+
+def lift(fn):
+    def lifted(coll):
+        yield from map(fn, coll)
+
+    return lifted
+
+
+def star(fn):
+    def starred(args):
+        return fn(*args)
+
+    return starred
+
+
+def lstarkeep(fn, coll):
+    return list(starkeep(fn, coll))
+
+
+def starkeep(fn, coll):
+    yield from keep(star(fn), coll)
+
+
+def until(cond, func, *args, **kwargs):
+    return first(filter(cond, iterate(func, *args, **kwargs)))
+
+
+def conj(x, xs):
+    return [x, *xs]
+
+
+def rconj(xs, x):
+    return [*xs, x]
+
+
+def make_merger_sentinel():
+    def no_new_mergers(records):
+        nonlocal number_of_records_so_far
+
+        number_of_records_now = len(records)
+
+        if number_of_records_now == number_of_records_so_far:
+            return True
+
+        else:
+            number_of_records_so_far = number_of_records_now
+            return False
+
+    number_of_records_so_far = -1
+
+    return no_new_mergers
+
+
+def zipmap(fn, boxes, n=2):
+    rets = lmap(list, zip(*map(fn, boxes)))
+    yield from repeatedly(lambda: [], n) if len(rets) < n else rets
+
+
+def every_nth(n, iterable):
+    return itertools.islice(iterable, 0, None, n)
--- a/src/cv_analysis/utils/visual_logging.py
+++ b/src/cv_analysis/utils/visual_logging.py
@ -1,11 +1,9 @@
 import os

-from pyinfra.config.loader import load_settings  # type: ignore
-
 from cv_analysis.config import get_config
 from cv_analysis.utils.display import save_image

-settings = get_config()
+CV_CONFIG = get_config()


 class VisualLogger:
@ -41,4 +39,4 @@ class VisualLogger:
        return self.level == "ALL"


-vizlogger = VisualLogger(settings.logging.visual_logging_level, settings.logging.visual_logging_output_folder)
+vizlogger = VisualLogger(CV_CONFIG.visual_logging_level, CV_CONFIG.visual_logging_output_folder)
--- a/data/2017-1078223.pdf
+++ b/data/2017-1078223.pdf
--- a/data/2017-1078223.vlp_output.annotated.pdf
+++ b/data/2017-1078223.vlp_output.annotated.pdf
--- a/data/2017-1078223.vlp_output.json
+++ b/data/2017-1078223.vlp_output.json
--- a/data/table_inference_test_files.zip
+++ b/data/table_inference_test_files.zip
--- a/devenvsetup.sh
+++ b/devenvsetup.sh
@ -1,30 +0,0 @@
-#!/bin/bash
-python_version=$1
-gitlab_user=$2
-gitlab_personal_access_token=$3
-
-# cookiecutter https://gitlab.knecon.com/knecon/research/template-python-project.git --checkout master
-# latest_dir=$(ls -td -- */ | head -n 1)  # should be the dir cookiecutter just created
-
-# cd $latest_dir
-
-pyenv install $python_version
-pyenv local $python_version
-pyenv shell $python_version
-
-pip install --upgrade pip
-pip install poetry
-
-poetry config installer.max-workers 10
-# research package registry
-poetry config repositories.gitlab-research https://gitlab.knecon.com/api/v4/groups/19/-/packages/pypi
-poetry config http-basic.gitlab-research ${gitlab_user} ${gitlab_personal_access_token}
-# redactmanager package registry
-poetry config repositories.gitlab-red https://gitlab.knecon.com/api/v4/groups/12/-/packages/pypi
-poetry config http-basic.gitlab-red ${gitlab_user} ${gitlab_personal_access_token}
-
-poetry env use $(pyenv which python)
-poetry install --with=dev
-poetry update
-
-source .venv/bin/activate
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -28,4 +28,4 @@ services:
    volumes:
      - /opt/bitnami/rabbitmq/.rabbitmq/:/data/bitnami
 volumes:
-  mdata:
+  mdata:
--- a/docs/build/html/.buildinfo
+++ b/docs/build/html/.buildinfo
@ -1,4 +0,0 @@
-# Sphinx build info version 1
-# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 04e9c6c5d3e412413c2949e598da60dc
-tags: 645f666f9bcd5a90fca523b33c5a78b7
--- a/docs/build/html/.doctrees/README.doctree
+++ b/docs/build/html/.doctrees/README.doctree
--- a/docs/build/html/.doctrees/environment.pickle
+++ b/docs/build/html/.doctrees/environment.pickle
--- a/docs/build/html/.doctrees/index.doctree
+++ b/docs/build/html/.doctrees/index.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.config.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.config.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.figure_detection.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.figure_detection.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.figure_detection.figure_detection.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.figure_detection.figure_detection.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.figure_detection.figures.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.figure_detection.figures.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.figure_detection.text.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.figure_detection.text.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.layout_parsing.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.layout_parsing.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.locations.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.locations.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.redaction_detection.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.redaction_detection.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.server.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.server.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.server.pipeline.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.server.pipeline.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.table_inference.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.table_inference.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.table_parsing.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.table_parsing.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.annotate.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.annotate.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.banner.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.banner.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.connect_rects.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.connect_rects.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.display.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.display.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.draw.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.draw.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.filters.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.filters.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.image_extraction.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.image_extraction.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.open_pdf.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.open_pdf.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.postprocessing.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.postprocessing.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.preprocessing.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.preprocessing.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.structures.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.structures.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.test_metrics.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.test_metrics.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.utils.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.utils.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.visual_logging.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.visual_logging.doctree
--- a/docs/build/html/.doctrees/modules/serve.doctree
+++ b/docs/build/html/.doctrees/modules/serve.doctree
--- a/docs/build/html/README.html
+++ b/docs/build/html/README.html
@ -1,657 +0,0 @@
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="./" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
-
-    <title>cv-analysis - Visual (CV-Based) Document Parsing &#8212; CV Analysis Service 2.5.2 documentation</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
-<link href="_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
-<link href="_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
-
-  
-  <link href="_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=a746c00c" />
-    <link rel="stylesheet" type="text/css" href="https://assets.readthedocs.org/static/css/badge_only.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
-<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
-  <script src="_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
-
-    <script src="_static/documentation_options.js?v=afc61bbc"></script>
-    <script src="_static/doctools.js?v=9a2dae69"></script>
-    <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'README';</script>
-    <script async="async" src="https://assets.readthedocs.org/static/javascript/readthedocs-doc-embed.js"></script>
-    <link rel="index" title="Index" href="genindex.html" />
-    <link rel="search" title="Search" href="search.html" />
-    <link rel="next" title="cv_analysis package" href="modules/cv_analysis.html" />
-    <link rel="prev" title="Welcome to CV Analysis Service documentation!" href="index.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  
-<!-- RTD Extra Head -->
-
-<link rel="stylesheet" href="https://assets.readthedocs.org/static/css/readthedocs-doc-embed.css" type="text/css" />
-
-<script type="application/json" id="READTHEDOCS_DATA">{"ad_free": "", "api_host": "", "builder": "sphinx", "canonical_url": "", "docroot": "", "features": {"docsearch_disabled": false}, "global_analytics_code": null, "language": "", "page": "README", "programming_language": "", "project": "", "source_suffix": ".md", "subprojects": {}, "theme": "", "user_analytics_code": null, "version": ""}</script>
-
-<!--
-Using this variable directly instead of using `JSON.parse` is deprecated.
-The READTHEDOCS_DATA global variable will be removed in the future.
-->
-<script type="text/javascript">
-READTHEDOCS_DATA = JSON.parse(document.getElementById('READTHEDOCS_DATA').innerHTML);
-</script>
-
-<script type="text/javascript" src="https://assets.readthedocs.org/static/javascript/readthedocs-analytics.js" async="async"></script>
-
-<!-- end RTD <extrahead> -->
-</head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>
-    Back to top
-  </button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          name="__primary"
-          id="__primary"/>
-  <label class="overlay overlay-primary" for="__primary"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          name="__secondary"
-          id="__secondary"/>
-  <label class="overlay overlay-secondary" for="__secondary"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search the docs ..."
-         aria-label="Search the docs ..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar">
-<div class="bd-header__inner bd-page-width">
-  <label class="sidebar-toggle primary-toggle" for="__primary">
-    <span class="fa-solid fa-bars"></span>
-  </label>
-  
-  
-  <div class="col-lg-3 navbar-header-items__start">
-    
-      <div class="navbar-item">
-
-  
-
-<a class="navbar-brand logo" href="index.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="_static/logo.png" class="logo__image only-light" alt="CV Analysis Service 2.5.2 documentation - Home"/>
-    <script>document.write(`<img src="_static/logo.png" class="logo__image only-dark" alt="CV Analysis Service 2.5.2 documentation - Home"/>`);</script>
-  
-  
-</a></div>
-    
-  </div>
-  
-  <div class="col-lg-9 navbar-header-items">
-    
-    <div class="me-auto navbar-header-items__center">
-      
-        <div class="navbar-item">
-<nav class="navbar-nav">
-  <ul class="bd-navbar-elements navbar-nav">
-    
-                    <li class="nav-item current active">
-                      <a class="nav-link nav-internal" href="#">
-                        cv-analysis - Visual (CV-Based) Document Parsing
-                      </a>
-                    </li>
-                
-
-                    <li class="nav-item">
-                      <a class="nav-link nav-internal" href="modules/cv_analysis.html">
-                        cv_analysis package
-                      </a>
-                    </li>
-                
-
-                    <li class="nav-item">
-                      <a class="nav-link nav-internal" href="modules/serve.html">
-                        serve module
-                      </a>
-                    </li>
-                
-  </ul>
-</nav></div>
-      
-    </div>
-    
-    
-    <div class="navbar-header-items__end">
-      
-        <div class="navbar-item navbar-persistent--container">
-          
-
- <script>
- document.write(`
-   <button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass"></i>
-    <span class="search-button__default-text">Search</span>
-    <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
-   </button>
- `);
- </script>
-        </div>
-      
-      
-        <div class="navbar-item">
-
-<script>
-document.write(`
-  <button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
-    <span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
-    <span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
-  </button>
-`);
-</script></div>
-      
-    </div>
-    
-  </div>
-  
-  
-    <div class="navbar-persistent--mobile">
-
- <script>
- document.write(`
-   <button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass"></i>
-    <span class="search-button__default-text">Search</span>
-    <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
-   </button>
- `);
- </script>
-    </div>
-  
-
-  
-    <label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
-      <span class="fa-solid fa-outdent"></span>
-    </label>
-  
-</div>
-
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-      <div class="sidebar-header-items__center">
-        
-          <div class="navbar-item">
-<nav class="navbar-nav">
-  <ul class="bd-navbar-elements navbar-nav">
-    
-                    <li class="nav-item current active">
-                      <a class="nav-link nav-internal" href="#">
-                        cv-analysis - Visual (CV-Based) Document Parsing
-                      </a>
-                    </li>
-                
-
-                    <li class="nav-item">
-                      <a class="nav-link nav-internal" href="modules/cv_analysis.html">
-                        cv_analysis package
-                      </a>
-                    </li>
-                
-
-                    <li class="nav-item">
-                      <a class="nav-link nav-internal" href="modules/serve.html">
-                        serve module
-                      </a>
-                    </li>
-                
-  </ul>
-</nav></div>
-        
-      </div>
-    
-    
-    
-      <div class="sidebar-header-items__end">
-        
-          <div class="navbar-item">
-
-<script>
-document.write(`
-  <button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
-    <span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
-    <span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
-  </button>
-`);
-</script></div>
-        
-      </div>
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-<nav class="bd-docs-nav bd-links"
-     aria-label="Section Navigation">
-  <p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
-  <div class="bd-toc-item navbar-nav"></div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main">
-        
-        
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item">
-
-
-
-<nav aria-label="Breadcrumb">
-  <ul class="bd-breadcrumbs">
-    
-    <li class="breadcrumb-item breadcrumb-home">
-      <a href="index.html" class="nav-link" aria-label="Home">
-        <i class="fa-solid fa-home"></i>
-      </a>
-    </li>
-    <li class="breadcrumb-item active" aria-current="page">cv-analysis...</li>
-  </ul>
-</nav>
-</div>
-      
-    </div>
-  
-  
-</div>
-</div>
-              
-              
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <section id="cv-analysis-visual-cv-based-document-parsing">
-<h1>cv-analysis - Visual (CV-Based) Document Parsing<a class="headerlink" href="#cv-analysis-visual-cv-based-document-parsing" title="Link to this heading">#</a></h1>
-<p>parse_pdf()
-This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
-previous redactions in documents.</p>
-<section id="api">
-<h2>API<a class="headerlink" href="#api" title="Link to this heading">#</a></h2>
-<p>Input message:</p>
-<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
-<span class="w">  </span><span class="nt">&quot;targetFilePath&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
-<span class="w">    </span><span class="nt">&quot;pdf&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;absolute file path&quot;</span><span class="p">,</span>
-<span class="w">    </span><span class="nt">&quot;vlp_output&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;absolute file path&quot;</span>
-<span class="w">  </span><span class="p">},</span>
-<span class="w">  </span><span class="nt">&quot;responseFilePath&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;absolute file path&quot;</span><span class="p">,</span>
-<span class="w">  </span><span class="nt">&quot;operation&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;table_image_inference&quot;</span>
-<span class="p">}</span>
-</pre></div>
-</div>
-<p>Response is uploaded to the storage as specified in the <code class="docutils literal notranslate"><span class="pre">responseFilePath</span></code> field. The structure is as follows:</p>
-<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
-<span class="w">  </span><span class="err">...</span><span class="p">,</span>
-<span class="w">  </span><span class="nt">&quot;data&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
-<span class="w">    </span><span class="p">{</span>
-<span class="w">      </span><span class="err">&#39;pageNum&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span>
-<span class="w">      </span><span class="err">&#39;bbox&#39;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
-<span class="w">        </span><span class="err">&#39;x</span><span class="mi">1</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mf">55.3407</span><span class="p">,</span>
-<span class="w">        </span><span class="err">&#39;y</span><span class="mi">1</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mf">247.0246</span><span class="p">,</span>
-<span class="w">        </span><span class="err">&#39;x</span><span class="mi">2</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mf">558.5602</span><span class="p">,</span>
-<span class="w">        </span><span class="err">&#39;y</span><span class="mi">2</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mf">598.0585</span>
-<span class="w">      </span><span class="p">},</span>
-<span class="w">      </span><span class="err">&#39;uuid&#39;</span><span class="p">:</span><span class="w"> </span><span class="err">&#39;</span><span class="mi">2</span><span class="err">b</span><span class="mi">10</span><span class="err">c</span><span class="mi">1</span><span class="err">a</span><span class="mi">2-393</span><span class="err">c</span><span class="mi">-4</span><span class="kc">f</span><span class="err">ca</span><span class="mi">-</span><span class="err">b</span><span class="mf">9e3-0</span><span class="err">ad</span><span class="mi">5</span><span class="err">b</span><span class="mi">774</span><span class="err">ac</span><span class="mi">84</span><span class="err">&#39;</span><span class="p">,</span>
-<span class="w">      </span><span class="err">&#39;label&#39;</span><span class="p">:</span><span class="w"> </span><span class="err">&#39;</span><span class="kc">ta</span><span class="err">ble&#39;</span><span class="p">,</span>
-<span class="w">      </span><span class="err">&#39;</span><span class="kc">ta</span><span class="err">bleLi</span><span class="kc">nes</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
-<span class="w">        </span><span class="p">{</span>
-<span class="w">          </span><span class="err">&#39;x</span><span class="mi">1</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span>
-<span class="w">          </span><span class="err">&#39;y</span><span class="mi">1</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">16</span><span class="p">,</span>
-<span class="w">          </span><span class="err">&#39;x</span><span class="mi">2</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">1399</span><span class="p">,</span>
-<span class="w">          </span><span class="err">&#39;y</span><span class="mi">2</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">16</span>
-<span class="w">        </span><span class="p">},</span>
-<span class="w">        </span><span class="err">...</span>
-<span class="w">      </span><span class="p">],</span>
-<span class="w">      </span><span class="err">&#39;imageI</span><span class="kc">nf</span><span class="err">o&#39;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
-<span class="w">        </span><span class="err">&#39;heigh</span><span class="kc">t</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">693</span><span class="p">,</span>
-<span class="w">        </span><span class="err">&#39;wid</span><span class="kc">t</span><span class="err">h&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">1414</span>
-<span class="w">      </span><span class="p">}</span>
-<span class="w">    </span><span class="p">},</span>
-<span class="w">    </span><span class="err">...</span>
-<span class="w">  </span><span class="p">]</span>
-<span class="p">}</span>
-</pre></div>
-</div>
-</section>
-<section id="installation">
-<h2>Installation<a class="headerlink" href="#installation" title="Link to this heading">#</a></h2>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>git<span class="w"> </span>clone<span class="w"> </span>ssh://git@git.iqser.com:2222/rr/cv-analysis.git
-<span class="nb">cd</span><span class="w"> </span>cv-analysis
-
-python<span class="w"> </span>-m<span class="w"> </span>venv<span class="w"> </span>env
-<span class="nb">source</span><span class="w"> </span>env/bin/activate
-
-pip<span class="w"> </span>install<span class="w"> </span>-e<span class="w"> </span>.
-pip<span class="w"> </span>install<span class="w"> </span>-r<span class="w"> </span>requirements.txt
-
-dvc<span class="w"> </span>pull
-</pre></div>
-</div>
-</section>
-<section id="usage">
-<h2>Usage<a class="headerlink" href="#usage" title="Link to this heading">#</a></h2>
-<section id="as-an-api">
-<h3>As an API<a class="headerlink" href="#as-an-api" title="Link to this heading">#</a></h3>
-<p>The module provided functions for the individual tasks that all return some kind of collection of points, depending on
-the specific task.</p>
-<section id="redaction-detection-api">
-<h4>Redaction Detection (API)<a class="headerlink" href="#redaction-detection-api" title="Link to this heading">#</a></h4>
-<p>The below snippet shows hot to find the outlines of previous redactions.</p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">cv_analysis.redaction_detection</span> <span class="kn">import</span> <span class="n">find_redactions</span>
-<span class="kn">import</span> <span class="nn">pdf2image</span>
-<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
-
-<span class="n">pdf_path</span> <span class="o">=</span> <span class="o">...</span>
-<span class="n">page_index</span> <span class="o">=</span> <span class="o">...</span>
-
-<span class="n">page</span> <span class="o">=</span> <span class="n">pdf2image</span><span class="o">.</span><span class="n">convert_from_path</span><span class="p">(</span><span class="n">pdf_path</span><span class="p">,</span> <span class="n">first_page</span><span class="o">=</span><span class="n">page_index</span><span class="p">,</span> <span class="n">last_page</span><span class="o">=</span><span class="n">page_index</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
-<span class="n">page</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">page</span><span class="p">)</span>
-
-<span class="n">redaction_contours</span> <span class="o">=</span> <span class="n">find_redactions</span><span class="p">(</span><span class="n">page</span><span class="p">)</span>
-</pre></div>
-</div>
-</section>
-</section>
-</section>
-<section id="as-a-cli-tool">
-<h2>As a CLI Tool<a class="headerlink" href="#as-a-cli-tool" title="Link to this heading">#</a></h2>
-<p>Core API functionalities can be used through a CLI.</p>
-<section id="table-parsing">
-<h3>Table Parsing<a class="headerlink" href="#table-parsing" title="Link to this heading">#</a></h3>
-<p>The tables parsing utility detects and segments tables into individual cells.</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">7</span><span class="w"> </span>--type<span class="w"> </span>table
-</pre></div>
-</div>
-<p>The below image shows a parsed table, where each table cell has been detected individually.</p>
-<p><img alt="Table Parsing Demonstration" src="_images/table_parsing.png" /></p>
-</section>
-<section id="redaction-detection-cli">
-<h3>Redaction Detection (CLI)<a class="headerlink" href="#redaction-detection-cli" title="Link to this heading">#</a></h3>
-<p>The redaction detection utility detects previous redactions in PDFs (filled black rectangles).</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">2</span><span class="w"> </span>--type<span class="w"> </span>redaction
-</pre></div>
-</div>
-<p>The below image shows the detected redactions with green outlines.</p>
-<p><img alt="Redaction Detection Demonstration" src="_images/redaction_detection.png" /></p>
-</section>
-<section id="layout-parsing">
-<h3>Layout Parsing<a class="headerlink" href="#layout-parsing" title="Link to this heading">#</a></h3>
-<p>The layout parsing utility detects elements such as paragraphs, tables and figures.</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">7</span><span class="w"> </span>--type<span class="w"> </span>layout
-</pre></div>
-</div>
-<p>The below image shows the detected layout elements on a page.</p>
-<p><img alt="Layout Parsing Demonstration" src="_images/layout_parsing.png" /></p>
-</section>
-<section id="figure-detection">
-<h3>Figure Detection<a class="headerlink" href="#figure-detection" title="Link to this heading">#</a></h3>
-<p>The figure detection utility detects figures specifically, which can be missed by the generic layout parsing utility.</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">3</span><span class="w"> </span>--type<span class="w"> </span>figure
-</pre></div>
-</div>
-<p>The below image shows the detected figure on a page.</p>
-<p><img alt="Figure Detection Demonstration" src="_images/figure_detection.png" /></p>
-</section>
-</section>
-<section id="running-as-a-service">
-<h2>Running as a service<a class="headerlink" href="#running-as-a-service" title="Link to this heading">#</a></h2>
-<section id="building">
-<h3>Building<a class="headerlink" href="#building" title="Link to this heading">#</a></h3>
-<p>Build base image</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>bash<span class="w"> </span>setup/docker.sh
-</pre></div>
-</div>
-<p>Build head image</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>build<span class="w"> </span>-f<span class="w"> </span>Dockerfile<span class="w"> </span>-t<span class="w"> </span>cv-analysis<span class="w"> </span>.<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">BASE_ROOT</span><span class="o">=</span><span class="s2">&quot;&quot;</span>
-</pre></div>
-</div>
-</section>
-<section id="usage-service">
-<h3>Usage (service)<a class="headerlink" href="#usage-service" title="Link to this heading">#</a></h3>
-<p>Shell 1</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>run<span class="w"> </span>--rm<span class="w"> </span>--net<span class="o">=</span>host<span class="w"> </span>--rm<span class="w"> </span>cv-analysis
-</pre></div>
-</div>
-<p>Shell 2</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/client_mock.py<span class="w"> </span>--pdf_path<span class="w"> </span>/path/to/a/pdf
-</pre></div>
-</div>
-</section>
-</section>
-</section>
-
-
-                </article>
-              
-              
-              
-              
-              
-                <footer class="prev-next-footer">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="index.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Welcome to CV Analysis Service documentation!</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="modules/cv_analysis.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">cv_analysis package</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-<div
-    id="pst-page-navigation-heading-2"
-    class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> On this page
-  </div>
-  <nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#api">API</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#installation">Installation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#usage">Usage</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#as-an-api">As an API</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#redaction-detection-api">Redaction Detection (API)</a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#as-a-cli-tool">As a CLI Tool</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#table-parsing">Table Parsing</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#redaction-detection-cli">Redaction Detection (CLI)</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#layout-parsing">Layout Parsing</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#figure-detection">Figure Detection</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#running-as-a-service">Running as a service</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#building">Building</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#usage-service">Usage (service)</a></li>
-</ul>
-</li>
-</ul>
-  </nav></div>
-
-  <div class="sidebar-secondary-item">
-
-  <div class="tocsection sourcelink">
-    <a href="_sources/README.md.txt">
-      <i class="fa-solid fa-file-lines"></i> Show Source
-    </a>
-  </div>
-</div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-          </footer>
-        
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
-<script src="_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
-
-  <footer class="bd-footer">
-<div class="bd-footer__inner bd-page-width">
-  
-    <div class="footer-items__start">
-      
-        <div class="footer-item">
-
-  <p class="copyright">
-    
-      © Copyright All rights reserved.
-      <br/>
-    
-  </p>
-</div>
-      
-        <div class="footer-item">
-
-  <p class="sphinx-version">
-    Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 7.3.7.
-    <br/>
-  </p>
-</div>
-      
-    </div>
-  
-  
-  
-    <div class="footer-items__end">
-      
-        <div class="footer-item">
-<p class="theme-version">
-  Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
-</p></div>
-      
-    </div>
-  
-</div>
-
-  </footer>
-  </body>
-</html>
--- a/docs/build/html/_images/figure_detection.png
+++ b/docs/build/html/_images/figure_detection.png
--- a/docs/build/html/_images/layout_parsing.png
+++ b/docs/build/html/_images/layout_parsing.png
--- a/docs/build/html/_images/redaction_detection.png
+++ b/docs/build/html/_images/redaction_detection.png
--- a/docs/build/html/_images/table_parsing.png
+++ b/docs/build/html/_images/table_parsing.png
--- a/docs/build/html/_sources/README.md.txt
+++ b/docs/build/html/_sources/README.md.txt
@ -1,178 +0,0 @@
-# cv-analysis - Visual (CV-Based) Document Parsing
-
-parse_pdf()
-This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
-previous redactions in documents.
-
-## API
-
-Input message:
-
-```json
-{
-  "targetFilePath": {
-    "pdf": "absolute file path",
-    "vlp_output": "absolute file path"
-  },
-  "responseFilePath": "absolute file path",
-  "operation": "table_image_inference"
-}
-```
-
-Response is uploaded to the storage as specified in the `responseFilePath` field. The structure is as follows:
-
-```json
-{
-  ...,
-  "data": [
-    {
-      'pageNum': 0,
-      'bbox': {
-        'x1': 55.3407,
-        'y1': 247.0246,
-        'x2': 558.5602,
-        'y2': 598.0585
-      },
-      'uuid': '2b10c1a2-393c-4fca-b9e3-0ad5b774ac84',
-      'label': 'table',
-      'tableLines': [
-        {
-          'x1': 0,
-          'y1': 16,
-          'x2': 1399,
-          'y2': 16
-        },
-        ...
-      ],
-      'imageInfo': {
-        'height': 693,
-        'width': 1414
-      }
-    },
-    ...
-  ]
-}
-
-```
-
-## Installation
-
-```bash
-git clone ssh://git@git.iqser.com:2222/rr/cv-analysis.git
-cd cv-analysis
-
-python -m venv env
-source env/bin/activate
-
-pip install -e .
-pip install -r requirements.txt
-
-dvc pull
-```
-
-## Usage
-
-### As an API
-
-The module provided functions for the individual tasks that all return some kind of collection of points, depending on
-the specific task.
-
-#### Redaction Detection (API)
-
-The below snippet shows hot to find the outlines of previous redactions.
-
-```python
-from cv_analysis.redaction_detection import find_redactions
-import pdf2image
-import numpy as np
-
-pdf_path = ...
-page_index = ...
-
-page = pdf2image.convert_from_path(pdf_path, first_page=page_index, last_page=page_index)[0]
-page = np.array(page)
-
-redaction_contours = find_redactions(page)
-```
-
-## As a CLI Tool
-
-Core API functionalities can be used through a CLI.
-
-### Table Parsing
-
-The tables parsing utility detects and segments tables into individual cells.
-
-```bash
-python scripts/annotate.py data/test_pdf.pdf 7 --type table
-```
-
-The below image shows a parsed table, where each table cell has been detected individually.
-
-![Table Parsing Demonstration](data/table_parsing.png)
-
-### Redaction Detection (CLI)
-
-The redaction detection utility detects previous redactions in PDFs (filled black rectangles).
-
-```bash
-python scripts/annotate.py data/test_pdf.pdf 2 --type redaction
-```
-
-The below image shows the detected redactions with green outlines.
-
-![Redaction Detection Demonstration](data/redaction_detection.png)
-
-### Layout Parsing
-
-The layout parsing utility detects elements such as paragraphs, tables and figures.
-
-```bash
-python scripts/annotate.py data/test_pdf.pdf 7 --type layout
-```
-
-The below image shows the detected layout elements on a page.
-
-![Layout Parsing Demonstration](data/layout_parsing.png)
-
-### Figure Detection
-
-The figure detection utility detects figures specifically, which can be missed by the generic layout parsing utility.
-
-```bash
-python scripts/annotate.py data/test_pdf.pdf 3 --type figure
-```
-
-The below image shows the detected figure on a page.
-
-![Figure Detection Demonstration](data/figure_detection.png)
-
-## Running as a service
-
-### Building
-
-Build base image
-
-```bash
-bash setup/docker.sh
-```
-
-Build head image
-
-```bash
-docker build -f Dockerfile -t cv-analysis . --build-arg BASE_ROOT=""
-```
-
-### Usage (service)
-
-Shell 1
-
-```bash
-docker run --rm --net=host --rm cv-analysis
-```
-
-Shell 2
-
-```bash
-python scripts/client_mock.py --pdf_path /path/to/a/pdf
-```
--- a/docs/build/html/_sources/index.rst.txt
+++ b/docs/build/html/_sources/index.rst.txt
@ -1,37 +0,0 @@
-.. Keyword Extraction Service documentation master file, created by
-   sphinx-quickstart on Mon Sep 12 12:04:24 2022.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
-=============================================
-Welcome to CV Analysis Service documentation!
-=============================================
-
-.. note::
-   
-   If you'd like to change the looks of things 👉 https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html
-
-
-Table of Contents
-----------------
-
-.. toctree::
-   :maxdepth: 3
-   :caption: README
-   
-   README.md
-   
-.. toctree::
-   :maxdepth: 3
-   :caption: Modules
-
-   modules/cv_analysis
-   modules/serve
-
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
--- a/docs/build/html/_sources/modules/cv_analysis.config.rst.txt
+++ b/docs/build/html/_sources/modules/cv_analysis.config.rst.txt
@ -1,7 +0,0 @@
-cv\_analysis.config module
-==========================
-
-.. automodule:: cv_analysis.config
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/Show More
+++ b/Show More