Merge branch 'master' of ssh://git.iqser.com:2222/rr/cv-analysis into clean_cv

black
2022-10-06 16:33:36 +02:00 · 2022-10-04 13:40:42 +02:00 · 2022-09-30 09:59:31 +02:00
297 changed files with 833 additions and 160837 deletions
--- a/.coveragerc
+++ b/.coveragerc
@ -10,7 +10,7 @@ omit =
    */build_venv/*
 	*/incl/*
 source =
-    cv_analysis
+    cv_analysis 
 relative_files = True
 data_file = .coverage

@ -46,4 +46,4 @@ ignore_errors = True
 directory = reports

 [xml]
-output = reports/coverage.xml
+output = reports/coverage.xml
--- a/.dockerignore
+++ b/.dockerignore
@ -97,4 +97,4 @@ target/
 *.swp
 */*.swp
 */*/*.swp
-*/*/*/*.swp
+*/*/*/*.swp
--- a/.dvc/config
+++ b/.dvc/config
@ -1,10 +1,7 @@
 [core]
-    remote = azure_remote
+    remote = vector
+    autostage = true
 ['remote "vector"']
    url = ssh://vector.iqser.com/research/nonml_cv_doc_parsing/
    port = 22
-['remote "azure_remote"']
-    url = azure://cv-sa-dvc/
-        connection_string = "DefaultEndpointsProtocol=https;AccountName=cvsacricket;AccountKey=KOuTAQ6Mp00ePTT5ObYmgaHlxwS1qukY4QU4Kuk7gy/vldneA+ZiKjaOpEFtqKA6Mtym2gQz8THy+ASts/Y1Bw==;EndpointSuffix=core.windows.net"
-['remote "local"']
-    url = ../dvc_local_remote
+
--- a/.gitignore
+++ b/.gitignore
@ -1,52 +1,28 @@
-# Environments
-.env
-.venv
-env/
-venv/
-.pytest*
-.python-version
-.DS_Store
-
-# Project folders
-scratch/
-*.vscode/
-.idea
-*_app
-*pytest_cache
-*joblib
-*tmp
-*profiling
-*logs
-*docker
-*drivers
-*bamboo-specs/target
-
-# Python specific files
 __pycache__/
-*.py[cod]
-*.ipynb
-*.ipynb_checkpoints
-
-# file extensions
-*.log
-*.csv
-*.json
-*.pkl
-*.profile
-*.cbm
-
-# temp files
-*.swp
-*~
-*.un~
-
-# keep files
-!notebooks/*.ipynb
-
-# keep folders
-!secrets
-!data/*
-!drivers
-
-# unignore files
-!bom.*
+*.egg-info/
+deskew_model/
+build_venv/
+/pdfs/
+/results/
+/pdfs/
+/env/
+/.idea/
+/.idea/.gitignore
+/.idea/misc.xml
+/.idea/inspectionProfiles/profiles_settings.xml
+/.idea/table_parsing.iml
+/.idea/vcs.xml
+/results/
+/table_parsing.egg-info
+/target/
+/tests/
+/cv_analysis.egg-info/dependency_links.txt
+/cv_analysis.egg-info/PKG-INFO
+/cv_analysis.egg-info/SOURCES.txt
+/cv_analysis.egg-info/top_level.txt
+/.vscode/
+/cv_analysis/test/test_data/example_pages.json
+/data/metadata_testing_files.csv
+.coverage
+/data/
+/venv/
--- a/.gitlab-ci.backup.yml
+++ b/.gitlab-ci.backup.yml
@ -1,30 +0,0 @@
-include:
-  - project: "Gitlab/gitlab"
-    ref: 0.3.0
-    file: "/ci-templates/research/dvc-versioning-build-release.gitlab-ci.yml"
-
-variables:
-  NEXUS_PROJECT_DIR: red
-  IMAGENAME: "${CI_PROJECT_NAME}"
-
-#################################
-# temp. disable integration tests, b/c they don't cover the CV analysis case yet
-trigger integration tests:
-  rules:
-    - when: never
-
-release build:
-  stage: release
-  needs:
-  - job: set custom version
-    artifacts: true
-    optional: true
-  - job: calculate patch version
-    artifacts: true
-    optional: true
-  - job: calculate minor version
-    artifacts: true
-    optional: true
-  - job: build docker nexus
-    artifacts: true
-#################################
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -1,35 +0,0 @@
-# CI for services, check gitlab repo for python package CI
-include:
-  - project: "Gitlab/gitlab"
-    ref: main
-    file: "/ci-templates/research/versioning-build-test-release.gitlab-ci.yml"
-  - project: "Gitlab/gitlab"
-    ref: main
-    file: "/ci-templates/research/docs.gitlab-ci.yml"
-
-# set project variables here
-variables:
-  NEXUS_PROJECT_DIR: red # subfolder in Nexus docker-gin where your container will be stored
-  IMAGENAME: $CI_PROJECT_NAME # if the project URL is gitlab.example.com/group-name/project-1, CI_PROJECT_NAME is project-1
-
-pages:
-  only:
-    - master # KEEP THIS, necessary because `master` branch and not `main` branch
-
-###################
-# INTEGRATION TESTS
-trigger-integration-tests:
-  extends: .integration-tests
-  # ADD THE MODEL BUILD WHICH SHOULD TRIGGER THE INTEGRATION TESTS
-  # needs:
-  #   - job: docker-build::model_name
-  #     artifacts: true
-  rules:
-    - when: never
-
-#########
-# RELEASE
-release:
-  extends: .release
-  needs:
-    - !reference [.needs-versioning, needs] # leave this line as is
--- a/.hooks/poetry_version_check.py
+++ b/.hooks/poetry_version_check.py
@ -1,61 +0,0 @@
-import subprocess
-import sys
-from pathlib import Path
-
-import semver
-from loguru import logger
-from semver.version import Version
-
-logger.remove()
-logger.add(sys.stdout, level="INFO")
-
-
-def bashcmd(cmds: list) -> str:
-    try:
-        logger.debug(f"running: {' '.join(cmds)}")
-        return subprocess.run(cmds, check=True, capture_output=True, text=True).stdout.strip("\n")
-    except:
-        logger.warning(f"Error executing the following bash command: {' '.join(cmds)}.")
-        raise
-
-
-def get_highest_existing_git_version_tag() -> str:
-    """Get highest versions from git tags depending on bump level"""
-    try:
-        git_tags = bashcmd(["git", "tag", "-l"]).split()
-        semver_compat_tags = list(filter(Version.is_valid, git_tags))
-        highest_git_version_tag = max(semver_compat_tags, key=semver.version.Version.parse)
-        logger.info(f"Highest git version tag: {highest_git_version_tag}")
-        return highest_git_version_tag
-    except:
-        logger.warning("Error getting git version tags")
-        raise
-
-
-def auto_bump_version() -> bool:
-    active = Path(".autoversion").is_file()
-    logger.debug(f"Automated version bump is set to '{active}'")
-    return active
-
-
-def main() -> None:
-    poetry_project_version = bashcmd(["poetry", "version", "-s"])
-
-    logger.info(f"Poetry project version: {poetry_project_version}")
-
-    highest_git_version_tag = get_highest_existing_git_version_tag()
-
-    comparison_result = semver.compare(poetry_project_version, highest_git_version_tag)
-
-    if comparison_result in (-1, 0):
-        logger.warning("Poetry version must be greater than git tag version.")
-        if auto_bump_version():
-            logger.info(bashcmd(["poetry", "version", highest_git_version_tag]))
-            sys.exit(0)
-        sys.exit(1)
-    else:
-        logger.info(f"All good: {poetry_project_version} > {highest_git_version_tag}")
-
-
-if __name__ == "__main__":
-    main()
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -1,72 +0,0 @@
-# See https://pre-commit.com for more information
-# See https://pre-commit.com/hooks.html for more hooks
-exclude: ^(docs/|notebooks/|data/|src/configs/|tests/|.hooks/|bom.json)
-default_language_version:
-  python: python3.10
-repos:
-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0
-    hooks:
-      - id: trailing-whitespace
-      - id: end-of-file-fixer
-      - id: check-yaml
-        args: [--unsafe] # needed for .gitlab-ci.yml
-      - id: check-toml
-      - id: detect-private-key
-      - id: check-added-large-files
-        args: ['--maxkb=10000']
-      - id: check-case-conflict
-      - id: mixed-line-ending
-
-  # - repo: https://github.com/pre-commit/mirrors-pylint
-  #   rev: v3.0.0a5
-  #   hooks:
-  #     - id: pylint
-  #       args:
-  #         - --disable=C0111,R0903,E0401
-  #         - --max-line-length=120
-
-  - repo: https://github.com/pre-commit/mirrors-isort
-    rev: v5.10.1
-    hooks:
-      - id: isort
-        args:
-          - --profile black
-
-  - repo: https://github.com/psf/black
-    rev: 24.10.0
-    hooks:
-      - id: black
-        # exclude: ^(docs/|notebooks/|data/|src/secrets/)
-        args:
-          - --line-length=120
-
-  - repo: https://github.com/compilerla/conventional-pre-commit
-    rev: v4.0.0
-    hooks:
-      - id: conventional-pre-commit
-        pass_filenames: false
-        stages: [commit-msg]
-        # args: [] # optional: list of Conventional Commits types to allow e.g. [feat, fix, ci, chore, test]
-
-  - repo: local
-    hooks:
-      - id: version-checker
-        name: version-checker
-        entry: python .hooks/poetry_version_check.py
-        language: python
-        always_run: true
-        additional_dependencies:
-          - "semver"
-          - "loguru"
-
-  # - repo: local
-  #   hooks:
-  #     - id: docker-build-test
-  #       name: testing docker build
-  #       entry: ./scripts/ops/docker-compose-build-run.sh
-  #       language: script
-  #       # always_run: true
-  #       pass_filenames: false
-  #       args: []
-  #       stages: [pre-commit]
--- a/84
+++ b/84
@ -1,78 +1,30 @@
-###############
-# BUILDER IMAGE
-FROM python:3.10-slim as builder
+FROM python:3.10

-ARG GITLAB_USER
-ARG GITLAB_ACCESS_TOKEN
+RUN python -m venv /app/venv
+ENV PATH="/app/venv/bin:$PATH"

-ARG PYPI_REGISTRY_RESEARCH=https://gitlab.knecon.com/api/v4/groups/19/-/packages/pypi
-ARG POETRY_SOURCE_REF_RESEARCH=gitlab-research
+RUN python -m pip install --upgrade pip

-ARG PYPI_REGISTRY_RED=https://gitlab.knecon.com/api/v4/groups/12/-/packages/pypi
-ARG POETRY_SOURCE_REF_RED=gitlab-red
+WORKDIR /app/service

-ARG PYPI_REGISTRY_FFORESIGHT=https://gitlab.knecon.com/api/v4/groups/269/-/packages/pypi
-ARG POETRY_SOURCE_REF_FFORESIGHT=gitlab-fforesight
+COPY ./requirements.txt ./requirements.txt
+RUN python3 -m pip install -r requirements.txt

-ARG VERSION=dev
+COPY ./incl/pyinfra/requirements.txt ./incl/pyinfra/requirements.txt
+RUN python -m pip install -r incl/pyinfra/requirements.txt

-LABEL maintainer="Research <research@knecon.com>"
-LABEL version="${VERSION}"
+COPY ./incl/pdf2image/requirements.txt ./incl/pdf2image/requirements.txt
+RUN python -m pip install -r incl/pdf2image/requirements.txt

-WORKDIR /app
+COPY ./incl ./incl

-###########
-# ENV SETUP
-ENV PYTHONDONTWRITEBYTECODE=true
-ENV PYTHONUNBUFFERED=true
-ENV POETRY_HOME=/opt/poetry
-ENV PATH="$POETRY_HOME/bin:$PATH"
+RUN python3 -m pip install -e incl/pyinfra
+RUN python3 -m pip install -e incl/pdf2image

-RUN apt-get update && \
-    apt-get install -y curl git bash build-essential libffi-dev libssl-dev && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN curl -sSL https://install.python-poetry.org | python3 -
-RUN poetry --version
-
-COPY pyproject.toml poetry.lock ./
-
-RUN poetry config virtualenvs.create true && \
-    poetry config virtualenvs.in-project true && \
-    poetry config installer.max-workers 10 && \
-    poetry config repositories.${POETRY_SOURCE_REF_RESEARCH} ${PYPI_REGISTRY_RESEARCH} && \
-    poetry config http-basic.${POETRY_SOURCE_REF_RESEARCH} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
-    poetry config repositories.${POETRY_SOURCE_REF_RED} ${PYPI_REGISTRY_RED} && \
-    poetry config http-basic.${POETRY_SOURCE_REF_RED} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
-    poetry config repositories.${POETRY_SOURCE_REF_FFORESIGHT} ${PYPI_REGISTRY_FFORESIGHT} && \
-    poetry config http-basic.${POETRY_SOURCE_REF_FFORESIGHT} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
-    poetry install --without=dev,docs,test -vv --no-interaction --no-root
-
-##################
-# COPY SOURCE CODE
-COPY ./config ./config
 COPY ./src ./src
+COPY ./cv_analysis ./cv_analysis
+COPY ./setup.py ./setup.py

-###############
-# WORKING IMAGE
-FROM python:3.10-slim
+RUN python3 -m pip install -e .

-# COPY BILL OF MATERIALS (BOM)
-COPY bom.json /bom.json
-
-# COPY SOURCE CODE FROM BUILDER IMAGE
-COPY --from=builder /app /app
-
-WORKDIR /app
-
-ENV PATH="/app/.venv/bin:$PATH"
-
-############
-# NETWORKING
-EXPOSE 5000
-EXPOSE 8080
-
-################
-# LAUNCH COMMAND
-CMD [ "python", "src/serve.py"]
+CMD ["python3", "-u", "src/serve.py"]
--- a/94
+++ b/94
@ -1,94 +0,0 @@
-.PHONY: \
-	poetry in-project-venv dev-env use-env install install-dev tests \
-	update-version sync-version-with-git \
-	docker docker-build-run docker-build docker-run \
-	docker-rm docker-rm-container docker-rm-image \
-	pre-commit get-licenses prep-commit \
-	docs sphinx_html sphinx_apidoc bom
-.DEFAULT_GOAL := run
-
-export DOCKER=docker
-export DOCKERFILE=Dockerfile
-export IMAGE_NAME=cv_analysis_service-image
-export CONTAINER_NAME=cv_analysis_service-container
-export HOST_PORT=9999
-export CONTAINER_PORT=9999
-export PYTHON_VERSION=python3.10
-
-# all commands should be executed in the root dir or the project,
-# specific environments should be deactivated
-
-poetry: in-project-venv use-env dev-env
-
-in-project-venv:
-	poetry config virtualenvs.in-project true
-
-use-env:
-	poetry env use ${PYTHON_VERSION}
-
-dev-env:
-	poetry install --with dev && poetry update
-
-install:
-	poetry add $(pkg)
-
-install-dev:
-	poetry add --dev $(pkg)
-
-requirements:
-	poetry export --without-hashes --output requirements.txt
-
-update-version:
-	poetry version prerelease
-
-sync-version-with-git:
-	git pull -p && poetry version $(git rev-list --tags --max-count=1 | git describe --tags --abbrev=0)
-
-bom:
-	cyclonedx-py poetry -o bom.json
-
-docker: docker-rm docker-build-run
-
-docker-build-run: docker-build docker-run
-
-docker-build:
-	$(DOCKER) build \
-	--no-cache --progress=plain \
-	-t $(IMAGE_NAME) -f $(DOCKERFILE) \
-	--build-arg USERNAME=${USERNAME} \
-	--build-arg TOKEN=${GITLAB_TOKEN} \
-	.
-
-docker-run:
-	$(DOCKER) run -it --rm -p $(HOST_PORT):$(CONTAINER_PORT)/tcp --name $(CONTAINER_NAME) $(IMAGE_NAME)
-
-docker-rm: docker-rm-container docker-rm-image
-
-docker-rm-container:
-	-$(DOCKER) rm $(CONTAINER_NAME)
-
-docker-rm-image:
-	-$(DOCKER) image rm $(IMAGE_NAME)
-
-tests:
-	poetry run pytest ./tests
-
-prep-commit:
-	docs get-license sync-version-with-git update-version pre-commit
-
-pre-commit:
-	pre-commit run --all-files
-
-get-licenses:
-	pip-licenses --format=json --order=license --with-urls > pkg-licenses.json
-
-docs: sphinx_apidoc sphinx_html
-
-sphinx_html:
-	poetry run sphinx-build -b html docs/source/ docs/build/html -E -a
-
-sphinx_apidoc:
-	cp ./README.md ./docs/source/README.md && cp -r ./data ./docs/source/data/ && poetry run sphinx-apidoc ./src -o ./docs/source/modules --no-toc --module-first --follow-links --separate --force
-
-bom:
-	cyclonedx-py poetry -o bom.json
--- a/README.md
+++ b/README.md
@ -1,60 +1,8 @@
-# cv-analysis - Visual (CV-Based) Document Parsing
+# cv-analysis &mdash; Visual (CV-Based) Document Parsing

-parse_pdf()
 This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
 previous redactions in documents.

-## API
-
-Input message:
-
-```json
-{
-  "targetFilePath": {
-    "pdf": "absolute file path",
-    "vlp_output": "absolute file path"
-  },
-  "responseFilePath": "absolute file path",
-  "operation": "table_image_inference"
-}
-```
-
-Response is uploaded to the storage as specified in the `responseFilePath` field. The structure is as follows:
-
-```json
-{
-  ...,
-  "data": [
-    {
-      'pageNum': 0,
-      'bbox': {
-        'x1': 55.3407,
-        'y1': 247.0246,
-        'x2': 558.5602,
-        'y2': 598.0585
-      },
-      'uuid': '2b10c1a2-393c-4fca-b9e3-0ad5b774ac84',
-      'label': 'table',
-      'tableLines': [
-        {
-          'x1': 0,
-          'y1': 16,
-          'x2': 1399,
-          'y2': 16
-        },
-        ...
-      ],
-      'imageInfo': {
-        'height': 693,
-        'width': 1414
-      }
-    },
-    ...
-  ]
-}
-
-```
-
 ## Installation

 ```bash
@ -83,9 +31,10 @@ The below snippet shows hot to find the outlines of previous redactions.

 ```python
 from cv_analysis.redaction_detection import find_redactions
-import pdf2image
+import pdf2image 
 import numpy as np

+
 pdf_path = ...
 page_index = ...

--- a/bamboo-specs/pom.xml
+++ b/bamboo-specs/pom.xml
@ -0,0 +1,40 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>com.atlassian.bamboo</groupId>
+    <artifactId>bamboo-specs-parent</artifactId>
+    <version>7.1.2</version>
+    <relativePath/>
+  </parent>
+
+  <artifactId>bamboo-specs</artifactId>
+  <version>1.0.0-SNAPSHOT</version>
+  <packaging>jar</packaging>
+
+  <properties>
+    <sonar.skip>true</sonar.skip>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.atlassian.bamboo</groupId>
+      <artifactId>bamboo-specs-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.atlassian.bamboo</groupId>
+      <artifactId>bamboo-specs</artifactId>
+    </dependency>
+
+    <!-- Test dependencies -->
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <!-- run 'mvn test' to perform offline validation of the plan -->
+  <!-- run 'mvn -Ppublish-specs' to upload the plan to your Bamboo server -->
+</project>
--- a/bamboo-specs/src/main/java/buildjob/PlanSpec.java
+++ b/bamboo-specs/src/main/java/buildjob/PlanSpec.java
@ -0,0 +1,178 @@
+package buildjob;
+
+import static com.atlassian.bamboo.specs.builders.task.TestParserTask.createJUnitParserTask;
+
+import java.time.LocalTime;
+
+import com.atlassian.bamboo.specs.api.BambooSpec;
+import com.atlassian.bamboo.specs.api.builders.BambooKey;
+import com.atlassian.bamboo.specs.api.builders.docker.DockerConfiguration;
+import com.atlassian.bamboo.specs.api.builders.permission.PermissionType;
+import com.atlassian.bamboo.specs.api.builders.permission.Permissions;
+import com.atlassian.bamboo.specs.api.builders.permission.PlanPermissions;
+import com.atlassian.bamboo.specs.api.builders.plan.Job;
+import com.atlassian.bamboo.specs.api.builders.plan.Plan;
+import com.atlassian.bamboo.specs.api.builders.plan.PlanIdentifier;
+import com.atlassian.bamboo.specs.api.builders.plan.Stage;
+import com.atlassian.bamboo.specs.api.builders.plan.branches.BranchCleanup;
+import com.atlassian.bamboo.specs.api.builders.plan.branches.PlanBranchManagement;
+import com.atlassian.bamboo.specs.api.builders.project.Project;
+import com.atlassian.bamboo.specs.builders.task.CheckoutItem;
+import com.atlassian.bamboo.specs.builders.task.InjectVariablesTask;
+import com.atlassian.bamboo.specs.builders.task.ScriptTask;
+import com.atlassian.bamboo.specs.builders.task.VcsCheckoutTask;
+import com.atlassian.bamboo.specs.builders.task.CleanWorkingDirectoryTask;
+import com.atlassian.bamboo.specs.builders.task.VcsTagTask;
+import com.atlassian.bamboo.specs.builders.trigger.BitbucketServerTrigger;
+import com.atlassian.bamboo.specs.builders.trigger.ScheduledTrigger;
+import com.atlassian.bamboo.specs.model.task.InjectVariablesScope;
+import com.atlassian.bamboo.specs.api.builders.Variable;
+import com.atlassian.bamboo.specs.util.BambooServer;
+import com.atlassian.bamboo.specs.builders.task.ScriptTask;
+import com.atlassian.bamboo.specs.model.task.ScriptTaskProperties.Location;
+
+/**
+ * Plan configuration for Bamboo.
+ * Learn more on: <a href="https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs">https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs</a>
+ */
+@BambooSpec
+public class PlanSpec {
+
+    private static final String SERVICE_NAME = "cv-analysis";
+
+    private static final String SERVICE_KEY = SERVICE_NAME.toUpperCase().replaceAll("-","").replaceAll("_","");
+
+    /**
+     * Run main to publish plan on Bamboo
+     */
+    public static void main(final String[] args) throws Exception {
+        //By default credentials are read from the '.credentials' file.
+        BambooServer bambooServer = new BambooServer("http://localhost:8085");
+
+        Plan plan = new PlanSpec().createDockerBuildPlan();
+        bambooServer.publish(plan);
+        PlanPermissions planPermission = new PlanSpec().createPlanPermission(plan.getIdentifier());
+        bambooServer.publish(planPermission);
+
+        Plan secPlan = new PlanSpec().createSecBuild();
+        bambooServer.publish(secPlan);
+        PlanPermissions secPlanPermission = new PlanSpec().createPlanPermission(secPlan.getIdentifier());
+        bambooServer.publish(secPlanPermission);
+    }
+
+    private PlanPermissions createPlanPermission(PlanIdentifier planIdentifier) {
+        Permissions permission = new Permissions()
+                .userPermissions("atlbamboo", PermissionType.EDIT, PermissionType.VIEW, PermissionType.ADMIN, PermissionType.CLONE, PermissionType.BUILD)
+                .groupPermissions("research", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
+                .groupPermissions("Development", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
+                .groupPermissions("QA", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
+                .loggedInUserPermissions(PermissionType.VIEW)
+                .anonymousUserPermissionView();
+        return new PlanPermissions(planIdentifier.getProjectKey(), planIdentifier.getPlanKey()).permissions(permission);
+    }
+
+    private Project project() {
+        return new Project()
+                .name("RED")
+                .key(new BambooKey("RED"));
+    }
+
+    public Plan createDockerBuildPlan() {
+    return new Plan(
+            project(),
+            SERVICE_NAME, new BambooKey(SERVICE_KEY))
+//             .description("Docker build for cv-analysis.")
+            // .variables()
+            .stages(new Stage("Build Stage")
+              .jobs(
+                new Job("Build Job", new BambooKey("BUILD"))
+                  .tasks(
+                    new CleanWorkingDirectoryTask()
+                        .description("Clean working directory.")
+                        .enabled(true),
+                    new VcsCheckoutTask()
+                        .description("Checkout default repository.")
+                        .checkoutItems(new CheckoutItem().defaultRepository()),
+                    new ScriptTask()
+                        .description("Set config and keys.")
+                        .location(Location.FILE)
+                        .fileFromPath("bamboo-specs/src/main/resources/scripts/key-prepare.sh"),
+                    new ScriptTask()
+                        .description("Build Docker container.")
+                        .location(Location.FILE)
+                        .fileFromPath("bamboo-specs/src/main/resources/scripts/docker-build.sh")
+                        .argument(SERVICE_NAME),
+                    new InjectVariablesTask()
+                        .description("Inject git tag.")
+                        .path("git.tag")
+                        .namespace("g")
+                        .scope(InjectVariablesScope.LOCAL),
+                    new VcsTagTask()
+                        .description("${bamboo.g.gitTag}")
+                        .tagName("${bamboo.g.gitTag}")
+                        .defaultRepository())
+                .dockerConfiguration(
+                    new DockerConfiguration()
+                        .image("nexus.iqser.com:5001/infra/release_build:4.5.0")
+                        .volume("/var/run/docker.sock", "/var/run/docker.sock")),
+                new Job("Licence Job", new BambooKey("LICENCE"))
+                  .enabled(false)
+                  .tasks(
+                    new VcsCheckoutTask()
+                        .description("Checkout default repository.")
+                        .checkoutItems(new CheckoutItem().defaultRepository()),
+                    new ScriptTask()
+                        .description("Build licence.")
+                        .location(Location.FILE)
+                        .fileFromPath("bamboo-specs/src/main/resources/scripts/create-licence.sh"))
+                  .dockerConfiguration(
+                    new DockerConfiguration()
+                        .image("nexus.iqser.com:5001/infra/maven:3.6.2-jdk-13-3.0.0")
+                        .volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml")
+                        .volume("/var/run/docker.sock", "/var/run/docker.sock"))))
+            .linkedRepositories("RR / " + SERVICE_NAME)
+            .triggers(
+                    new BitbucketServerTrigger())
+            .planBranchManagement(
+                new PlanBranchManagement()
+                    .createForVcsBranch()
+                    .delete(
+                        new BranchCleanup()
+                            .whenInactiveInRepositoryAfterDays(14))
+                    .notificationForCommitters());
+    }
+
+    public Plan createSecBuild() {
+        return new Plan(project(), SERVICE_NAME + "-Sec", new BambooKey(SERVICE_KEY + "SEC")).description("Security Analysis Plan")
+                .stages(new Stage("Default Stage").jobs(
+                    new Job("Sonar Job", new BambooKey("SONAR"))
+                      .tasks(
+                        new CleanWorkingDirectoryTask()
+                            .description("Clean working directory.")
+                            .enabled(true),
+                        new VcsCheckoutTask()
+                            .description("Checkout default repository.")
+                            .checkoutItems(new CheckoutItem().defaultRepository()),
+                        new ScriptTask()
+                            .description("Set config and keys.")
+                            .location(Location.FILE)
+                            .fileFromPath("bamboo-specs/src/main/resources/scripts/key-prepare.sh"),
+                        new ScriptTask()
+                            .description("Run Sonarqube scan.")
+                            .location(Location.FILE)
+                            .fileFromPath("bamboo-specs/src/main/resources/scripts/sonar-scan.sh")
+                            .argument(SERVICE_NAME))
+                      .dockerConfiguration(
+                          new DockerConfiguration()
+                            .image("nexus.iqser.com:5001/infra/release_build:4.2.0")
+                            .volume("/var/run/docker.sock", "/var/run/docker.sock"))))
+                .linkedRepositories("RR / " + SERVICE_NAME)
+                .triggers(
+                    new ScheduledTrigger()
+                        .scheduleOnceDaily(LocalTime.of(23, 00)))
+                .planBranchManagement(
+                    new PlanBranchManagement()
+                        .createForVcsBranchMatching("release.*")
+                        .notificationForCommitters());
+    }
+}
--- a/bamboo-specs/src/main/resources/scripts/create-licence.sh
+++ b/bamboo-specs/src/main/resources/scripts/create-licence.sh
@ -0,0 +1,19 @@
+#!/bin/bash
+set -e
+
+if [[ \"${bamboo_version_tag}\" != \"dev\" ]]
+then
+    ${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
+                    -f ${bamboo_build_working_directory}/pom.xml \
+                    versions:set  \
+                    -DnewVersion=${bamboo_version_tag}
+
+    ${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
+                    -f ${bamboo_build_working_directory}/pom.xml \
+                    -B clean deploy \
+                    -e -DdeployAtEnd=true \
+                    -Dmaven.wagon.http.ssl.insecure=true \
+                    -Dmaven.wagon.http.ssl.allowall=true \
+                    -Dmaven.wagon.http.ssl.ignore.validity.dates=true \
+                    -DaltDeploymentRepository=iqser_release::default::https://nexus.iqser.com/repository/gin4-platform-releases
+fi
--- a/bamboo-specs/src/main/resources/scripts/docker-build.sh
+++ b/bamboo-specs/src/main/resources/scripts/docker-build.sh
@ -0,0 +1,53 @@
+#!/bin/bash
+set -e
+
+SERVICE_NAME=$1
+
+if [[ "$bamboo_planRepository_branchName" == "master" ]]
+then
+    branchVersion=$(cat version.yaml | grep -Eo "version: .*" | sed -s 's|version: \(.*\)\..*\..*|\1|g')
+    latestVersion=$( semver $(git tag -l "${branchVersion}.*" ) | tail -n1 )
+    newVersion="$(semver $latestVersion -p -i minor)"
+    echo "new release on master with version $newVersion"
+elif [[ "$bamboo_planRepository_branchName" == release* ]]
+then
+    branchVersion=$(echo $bamboo_planRepository_branchName | sed -s 's|release\/\([0-9]\+\.[0-9]\+\)\.x|\1|')
+    latestVersion=$( semver $(git tag -l "${branchVersion}.*" ) | tail -n1 )
+    newVersion="$(semver $latestVersion -p -i patch)"
+    echo "new release on $bamboo_planRepository_branchName with version $newVersion"
+elif [[ "${bamboo_version_tag}" != "dev" ]]
+then
+    newVersion="${bamboo_version_tag}"
+    echo "new special version bild with $newVersion"
+else
+    newVersion="${bamboo_planRepository_1_branch}_${bamboo_buildNumber}"
+    echo "gitTag=${newVersion}" > git.tag
+    echo "dev build with tag ${newVersion}"
+    python3 -m venv build_venv
+    source build_venv/bin/activate
+    python3 -m pip install --upgrade pip
+
+    pip install dvc
+    pip install 'dvc[ssh]'
+    dvc pull
+
+    echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iqser.com/repository/python-combind/simple" >> pip.conf
+    echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
+    docker build -f Dockerfile .
+    exit 0
+fi
+
+echo "gitTag=${newVersion}" > git.tag
+
+python3 -m venv build_venv
+source build_venv/bin/activate
+python3 -m pip install --upgrade pip
+
+pip install dvc
+pip install 'dvc[ssh]'
+dvc pull
+
+echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iqser.com/repository/python-combind/simple" >> pip.conf
+docker build -f Dockerfile  -t nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion} .
+echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
+docker push nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion}
--- a/bamboo-specs/src/main/resources/scripts/key-prepare.sh
+++ b/bamboo-specs/src/main/resources/scripts/key-prepare.sh
@ -0,0 +1,8 @@
+#!/bin/bash
+set -e
+
+mkdir -p ~/.ssh
+echo "${bamboo_agent_ssh}" | base64 -d >> ~/.ssh/id_rsa
+echo "host vector.iqser.com" > ~/.ssh/config
+echo "    user bamboo-agent" >> ~/.ssh/config
+chmod 600 ~/.ssh/config ~/.ssh/id_rsa
--- a/bamboo-specs/src/main/resources/scripts/sonar-scan.sh
+++ b/bamboo-specs/src/main/resources/scripts/sonar-scan.sh
@ -0,0 +1,67 @@
+#!/bin/bash
+set -e
+
+export JAVA_HOME=/usr/bin/sonar-scanner/jre
+
+python3 -m venv build_venv
+source build_venv/bin/activate
+python3 -m pip install --upgrade pip
+
+echo "dev setup for unit test and coverage"
+
+pip install -e incl/pyinfra
+pip install -r incl/pyinfra/requirements.txt
+
+pip install -e incl/pdf2image
+pip install -r incl/pdf2image/requirements.txt
+
+pip install -e .
+pip install -r requirements.txt
+
+
+echo "DVC pull step"
+dvc pull
+
+echo "coverage calculation"
+coverage run -m pytest
+echo "coverage report generation"
+coverage report -m
+coverage xml
+
+SERVICE_NAME=$1
+
+echo "dependency-check:aggregate"
+mkdir -p reports
+dependency-check --enableExperimental -f JSON -f HTML -f XML \
+  --disableAssembly -s . -o reports --project $SERVICE_NAME --exclude ".git/**" --exclude "venv/**" \
+  --exclude "build_venv/**" --exclude "**/__pycache__/**"
+
+if [[ -z "${bamboo_repository_pr_key}" ]]
+then
+    echo "Sonar Scan for branch: ${bamboo_planRepository_1_branch}"
+    /usr/bin/sonar-scanner/bin/sonar-scanner -X\
+      -Dsonar.projectKey=RED_$SERVICE_NAME \
+      -Dsonar.sources=src,cv_analysis \
+      -Dsonar.host.url=https://sonarqube.iqser.com \
+      -Dsonar.login=${bamboo_sonarqube_api_token_secret} \
+      -Dsonar.branch.name=${bamboo_planRepository_1_branch} \
+      -Dsonar.dependencyCheck.jsonReportPath=reports/dependency-check-report.json \
+      -Dsonar.dependencyCheck.xmlReportPath=reports/dependency-check-report.xml \
+      -Dsonar.dependencyCheck.htmlReportPath=reports/dependency-check-report.html \
+      -Dsonar.python.coverage.reportPaths=reports/coverage.xml
+
+else
+    echo "Sonar Scan for PR with key1: ${bamboo_repository_pr_key}"
+    /usr/bin/sonar-scanner/bin/sonar-scanner \
+      -Dsonar.projectKey=RED_$SERVICE_NAME \
+      -Dsonar.sources=src,cv_analysis \
+      -Dsonar.host.url=https://sonarqube.iqser.com \
+      -Dsonar.login=${bamboo_sonarqube_api_token_secret} \
+      -Dsonar.pullrequest.key=${bamboo_repository_pr_key} \
+      -Dsonar.pullrequest.branch=${bamboo_repository_pr_sourceBranch} \
+      -Dsonar.pullrequest.base=${bamboo_repository_pr_targetBranch} \
+      -Dsonar.dependencyCheck.jsonReportPath=reports/dependency-check-report.json \
+      -Dsonar.dependencyCheck.xmlReportPath=reports/dependency-check-report.xml \
+      -Dsonar.dependencyCheck.htmlReportPath=reports/dependency-check-report.html \
+      -Dsonar.python.coverage.reportPaths=reports/coverage.xml
+fi
--- a/bamboo-specs/src/test/java/buildjob/PlanSpecTest.java
+++ b/bamboo-specs/src/test/java/buildjob/PlanSpecTest.java
@ -0,0 +1,22 @@
+package buildjob;
+
+
+import com.atlassian.bamboo.specs.api.builders.plan.Plan;
+import com.atlassian.bamboo.specs.api.exceptions.PropertiesValidationException;
+import com.atlassian.bamboo.specs.api.util.EntityPropertiesBuilders;
+import org.junit.Test;
+
+public class PlanSpecTest {
+    @Test
+    public void checkYourPlanOffline() throws PropertiesValidationException {
+        Plan plan = new PlanSpec().createDockerBuildPlan();
+
+        EntityPropertiesBuilders.build(plan);
+    }
+
+    @Test
+    public void checkYourSecPlanOffline() throws PropertiesValidationException {
+        Plan secPlan = new PlanSpec().createSecBuild();
+        EntityPropertiesBuilders.build(secPlan);
+    }
+}
--- a/bom.json
+++ b/bom.json
--- a/config/pyinfra.toml
+++ b/config/pyinfra.toml
@ -1,67 +0,0 @@
-
-[asyncio]
-max_concurrent_tasks = 10
-
-[dynamic_tenant_queues]
-enabled = true
-
-[metrics.prometheus]
-enabled = true
-prefix = "redactmanager_cv_analysis_service"
-
-[tracing]
-enabled = true
-# possible values "opentelemetry" | "azure_monitor" (Excpects APPLICATIONINSIGHTS_CONNECTION_STRING environment variable.)
-type = "azure_monitor"
-
-[tracing.opentelemetry]
-endpoint = "http://otel-collector-opentelemetry-collector.otel-collector:4318/v1/traces"
-service_name = "redactmanager_cv_analysis_service"
-exporter = "otlp"
-
-[webserver]
-host = "0.0.0.0"
-port = 8080
-
-[rabbitmq]
-host = "localhost"
-port = 5672
-username = ""
-password = ""
-heartbeat = 60
-# Has to be a divider of heartbeat, and shouldn't be too big, since only in these intervals queue interactions happen (like receiving new messages)
-# This is also the minimum time the service needs to process a message
-connection_sleep = 5
-input_queue = "request_queue"
-output_queue = "response_queue"
-dead_letter_queue = "dead_letter_queue"
-
-tenant_event_queue_suffix = "_tenant_event_queue"
-tenant_event_dlq_suffix = "_tenant_events_dlq"
-tenant_exchange_name = "tenants-exchange"
-queue_expiration_time = 300000                                   # 5 minutes in milliseconds
-service_request_queue_prefix = "cv_analysis_request_queue"
-service_request_exchange_name = "cv_analysis_request_exchange"
-service_response_exchange_name = "cv_analysis_response_exchange"
-service_dlq_name = "cv_analysis_dlq"
-
-[storage]
-backend = "s3"
-
-[storage.s3]
-bucket = "redaction"
-endpoint = "http://127.0.0.1:9000"
-key = ""
-secret = ""
-region = "eu-central-1"
-
-[storage.azure]
-container = "redaction"
-connection_string = ""
-
-[storage.tenant_server]
-public_key = ""
-endpoint = "http://tenant-user-management:8081/internal-api/tenants"
-
-[kubernetes]
-pod_name = "test_pod"
--- a/config/settings.toml
+++ b/config/settings.toml
@ -1,19 +0,0 @@
-[logging]
-level = "INFO"
-visual_logging_level = "DISABLED"
-visual_logging_output_folder = "/tmp/debug"
-
-[table_parsing]
-skip_pages_without_images = true
-
-[paths]
-root = "@format {env[ROOT_PATH]}"
-dvc_data_dir = "${paths.root}/data"
-pdf_for_testing = "${paths.dvc_data_dir}/pdfs_for_testing"
-png_for_testing = "${paths.dvc_data_dir}/pngs_for_testing"
-png_figures_detected = "${paths.png_for_testing}/figures_detected"
-png_tables_detected = "${paths.png_for_testing}/tables_detected_by_tp"
-hashed_pdfs_for_testing = "${paths.pdf_for_testing}/hashed"
-metadata_test_files = "${paths.dvc_data_dir}/metadata_testing_files.csv"
-test_dir = "${paths.dvc_data_dir}/test"
-test_data_dir = "${paths.dvc_data_dir}/test/test_data"
--- a/src/cv_analysis/init.py
+++ b/src/cv_analysis/init.py
--- a/cv_analysis/config.py
+++ b/cv_analysis/config.py
@ -0,0 +1,30 @@
+import os
+
+
+def get_config():
+    return Config()
+
+
+class Config:
+    def __init__(self):
+        self.logging_level_root = os.environ.get("LOGGING_LEVEL_ROOT", "INFO")
+
+        # visual_logging_level: NOTHING > INFO > DEBUG > ALL
+        self.visual_logging_level = "DISABLED"
+        self.visual_logging_output_folder = "/tmp/debug"
+
+        # locations
+        # FIXME: is everything here necessary?
+        root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        self.dvc_data_dir = os.path.join(root, "data")
+        self.pdf_for_testing = os.path.join(self.dvc_data_dir, "pdfs_for_testing")
+        self.png_for_testing = os.path.join(self.dvc_data_dir, "pngs_for_testing")
+        self.png_figures_detected = os.path.join(self.png_for_testing, "figures_detected")
+        self.png_tables_detected = os.path.join(self.png_for_testing, "tables_detected_by_tp")
+        self.hashed_pdfs_for_testing = os.path.join(self.pdf_for_testing, "hashed")
+        self.metadata_test_files = os.path.join(self.dvc_data_dir, "metadata_testing_files.csv")
+        self.test_dir = os.path.join(root, "test")
+        self.test_data_dir = os.path.join(self.test_dir, "test_data")
+
+    def __getitem__(self, key):
+        return self.__getattribute__(key)
--- a/src/cv_analysis/figure_detection/init.py
+++ b/src/cv_analysis/figure_detection/init.py
--- a/src/cv_analysis/figure_detection/figure_detection.py
+++ b/src/cv_analysis/figure_detection/figure_detection.py
@ -6,15 +6,15 @@ import numpy as np
 from cv_analysis.figure_detection.figures import detect_large_coherent_structures
 from cv_analysis.figure_detection.text import remove_primary_text_regions
 from cv_analysis.utils.filters import (
-    has_acceptable_format,
    is_large_enough,
+    has_acceptable_format,
    is_not_too_large,
 )
 from cv_analysis.utils.postprocessing import remove_included
 from cv_analysis.utils.structures import Rectangle


-def detect_figures(image: np.ndarray):
+def detect_figures(image: np.array):
    max_area = image.shape[0] * image.shape[1] * 0.99
    min_area = 5000
    max_width_to_height_ratio = 6
@ -24,10 +24,9 @@ def detect_figures(image: np.ndarray):
    cnts = detect_large_coherent_structures(image)
    cnts = filter(figure_filter, cnts)

-    # rects = map(compose(Rectangle.from_xywh, cv2.boundingRect), (cnts))
-
-    bounding_rects = map(cv2.boundingRect, cnts)
-    rects: list[Rectangle] = remove_included(map(Rectangle.from_xywh, rects))
+    rects = map(cv2.boundingRect, cnts)
+    rects = map(Rectangle.from_xywh, rects)
+    rects = remove_included(rects)

    return rects

--- a/src/cv_analysis/figure_detection/figures.py
+++ b/src/cv_analysis/figure_detection/figures.py
@ -2,7 +2,7 @@ import cv2
 import numpy as np


-def detect_large_coherent_structures(image: np.ndarray):
+def detect_large_coherent_structures(image: np.array):
    """Detects large coherent structures on an image.
    Expects an image with binary color space (e.g. threshold applied).

--- a/src/cv_analysis/figure_detection/text.py
+++ b/src/cv_analysis/figure_detection/text.py
--- a/src/cv_analysis/layout_parsing.py
+++ b/src/cv_analysis/layout_parsing.py
@ -1,5 +1,5 @@
-import itertools
-from itertools import compress, starmap
+from itertools import compress
+from itertools import starmap
 from operator import __and__

 import cv2
@ -7,12 +7,10 @@ import numpy as np

 from cv_analysis.utils.connect_rects import connect_related_rects2
 from cv_analysis.utils.postprocessing import (
-    has_no_parent,
    remove_included,
-    remove_overlapping,
+    has_no_parent,
 )
 from cv_analysis.utils.structures import Rectangle
-from cv_analysis.utils.visual_logging import vizlogger


 # could be dynamic parameter is the scan is noisy
@ -48,7 +46,7 @@ def fill_in_component_area(image, rect):
    return ~image


-def parse_layout(image: np.ndarray):
+def parse_layout(image: np.array):
    image = image.copy()
    image_ = image.copy()

@ -77,7 +75,8 @@ def parse_layout(image: np.ndarray):
    rects = list(map(Rectangle.from_xywh, rects))
    rects = remove_included(rects)

-    rects = connect_related_rects2(map(lambda r: r.xywh(), rects))
+    rects = map(lambda r: r.xywh(), rects)
+    rects = connect_related_rects2(rects)
    rects = list(map(Rectangle.from_xywh, rects))
    rects = remove_included(rects)

--- a/src/cv_analysis/redaction_detection.py
+++ b/src/cv_analysis/redaction_detection.py
@ -2,9 +2,10 @@ from functools import partial

 import cv2
 import numpy as np
-from iteration_utilities import first, starfilter  # type: ignore
+from iteration_utilities import first
+from iteration_utilities._iteration_utilities import starfilter

-from cv_analysis.utils.filters import is_boxy, is_filled, is_large_enough
+from cv_analysis.utils.filters import is_large_enough, is_filled, is_boxy
 from cv_analysis.utils.visual_logging import vizlogger


@ -12,7 +13,7 @@ def is_likely_redaction(contour, hierarchy, min_area):
    return is_filled(hierarchy) and is_boxy(contour) and is_large_enough(contour, min_area)


-def find_redactions(image: np.ndarray, min_normalized_area=200000):
+def find_redactions(image: np.array, min_normalized_area=200000):
    vizlogger.debug(image, "redactions01_start.png")
    min_normalized_area /= 200  # Assumes 200 DPI PDF -> image conversion resolution

@ -29,14 +30,13 @@ def find_redactions(image: np.ndarray, min_normalized_area=200000):
    contours, hierarchies = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)

    try:
-        return list(
-            map(
-                first,
-                starfilter(
-                    partial(is_likely_redaction, min_area=min_normalized_area),
-                    zip(contours, hierarchies[0]),
-                ),
-            )
+        contours = map(
+            first,
+            starfilter(
+                partial(is_likely_redaction, min_area=min_normalized_area),
+                zip(contours, hierarchies[0]),
+            ),
        )
+        return list(contours)
    except:
        return []
--- a/src/cv_analysis/server/init.py
+++ b/src/cv_analysis/server/init.py
--- a/cv_analysis/server/pipeline.py
+++ b/cv_analysis/server/pipeline.py
@ -0,0 +1,56 @@
+from dataclasses import asdict
+from operator import truth
+
+from funcy import lmap, flatten
+
+from cv_analysis.figure_detection.figure_detection import detect_figures
+from cv_analysis.table_parsing import parse_tables
+from cv_analysis.utils.structures import Rectangle
+from pdf2img.conversion import convert_pages_to_images
+from pdf2img.default_objects.image import ImagePlus, ImageInfo
+from pdf2img.default_objects.rectangle import RectanglePlus
+
+
+def get_analysis_pipeline(operation):
+    if operation == "table":
+        return make_analysis_pipeline(parse_tables, table_parsing_formatter, dpi=200)
+    elif operation == "figure":
+        return make_analysis_pipeline(detect_figures, figure_detection_formatter, dpi=200)
+    else:
+        raise
+
+
+def make_analysis_pipeline(analysis_fn, formatter, dpi):
+    def analyse_pipeline(pdf: bytes, index=None):
+        def parse_page(page: ImagePlus):
+            image = page.asarray()
+            rects = analysis_fn(image)
+            if not rects:
+                return
+            infos = formatter(rects, page, dpi)
+            return infos
+
+        pages = convert_pages_to_images(pdf, index=index, dpi=dpi)
+        results = map(parse_page, pages)
+
+        yield from flatten(filter(truth, results))
+
+    return analyse_pipeline
+
+
+def table_parsing_formatter(rects, page: ImagePlus, dpi):
+    def format_rect(rect: Rectangle):
+        rect_plus = RectanglePlus.from_pixels(*rect.xyxy(), page.info, alpha=False, dpi=dpi)
+        return rect_plus.asdict(derotate=True)
+
+    bboxes = lmap(format_rect, rects)
+
+    return {"pageInfo": page.asdict(natural_index=True), "tableCells": bboxes}
+
+
+def figure_detection_formatter(rects, page, dpi):
+    def format_rect(rect: Rectangle):
+        rect_plus = RectanglePlus.from_pixels(*rect.xyxy(), page.info, alpha=False, dpi=dpi)
+        return asdict(ImageInfo(page.info, rect_plus.asbbox(derotate=False), rect_plus.alpha))
+
+    return lmap(format_rect, rects)
--- a/cv_analysis/table_parsing.py
+++ b/cv_analysis/table_parsing.py
@ -0,0 +1,135 @@
+import cv2
+import numpy as np
+from funcy import lmap, lfilter
+
+from cv_analysis.layout_parsing import parse_layout
+from cv_analysis.utils.postprocessing import remove_isolated  # xywh_to_vecs, xywh_to_vec_rect, adjacent1d
+from cv_analysis.utils.structures import Rectangle
+from cv_analysis.utils.visual_logging import vizlogger
+
+
+def add_external_contours(image, image_h_w_lines_only):
+
+    contours, _ = cv2.findContours(image_h_w_lines_only, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
+    for cnt in contours:
+        x, y, w, h = cv2.boundingRect(cnt)
+        cv2.rectangle(image, (x, y), (x + w, y + h), 255, 1)
+
+    return image
+
+
+def apply_motion_blur(image: np.array, angle, size=80):
+    """Solidifies and slightly extends detected lines.
+
+    Args:
+        image (np.array): page image as array
+        angle: direction in which to apply blur, 0 or 90
+        size (int): kernel size; 80 found empirically to work well
+
+    Returns:
+        np.array
+
+    """
+    k = np.zeros((size, size), dtype=np.float32)
+    vizlogger.debug(k, "tables08_blur_kernel1.png")
+    k[(size - 1) // 2, :] = np.ones(size, dtype=np.float32)
+    vizlogger.debug(k, "tables09_blur_kernel2.png")
+    k = cv2.warpAffine(
+        k,
+        cv2.getRotationMatrix2D((size / 2 - 0.5, size / 2 - 0.5), angle, 1.0),
+        (size, size),
+    )
+    vizlogger.debug(k, "tables10_blur_kernel3.png")
+    k = k * (1.0 / np.sum(k))
+    vizlogger.debug(k, "tables11_blur_kernel4.png")
+    blurred = cv2.filter2D(image, -1, k)
+    return blurred
+
+
+def isolate_vertical_and_horizontal_components(img_bin):
+    """Identifies and reinforces horizontal and vertical lines in a binary image.
+
+    Args:
+        img_bin (np.array): array corresponding to single binarized page image
+        bounding_rects (list): list of layout boxes of the form (x, y, w, h), potentially containing tables
+
+    Returns:
+        np.array
+    """
+    line_min_width = 48
+    kernel_h = np.ones((1, line_min_width), np.uint8)
+    kernel_v = np.ones((line_min_width, 1), np.uint8)
+
+    img_bin_h = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernel_h)
+    img_bin_v = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernel_v)
+    img_lines_raw = img_bin_v | img_bin_h
+
+    kernel_h = np.ones((1, 30), np.uint8)
+    kernel_v = np.ones((30, 1), np.uint8)
+    img_bin_h = cv2.dilate(img_bin_h, kernel_h, iterations=2)
+    img_bin_v = cv2.dilate(img_bin_v, kernel_v, iterations=2)
+
+    img_bin_h = apply_motion_blur(img_bin_h, 0)
+    img_bin_v = apply_motion_blur(img_bin_v, 90)
+
+    img_bin_extended = img_bin_h | img_bin_v
+
+    th1, img_bin_extended = cv2.threshold(img_bin_extended, 120, 255, cv2.THRESH_BINARY)
+    img_bin_final = cv2.dilate(img_bin_extended, np.ones((1, 1), np.uint8), iterations=1)
+    # add contours before lines are extended by blurring
+    img_bin_final = add_external_contours(img_bin_final, img_lines_raw)
+
+    return img_bin_final
+
+
+def find_table_layout_boxes(image: np.array):
+    def is_large_enough(box):
+        (x, y, w, h) = box
+        if w * h >= 100000:
+            return Rectangle.from_xywh(box)
+
+    layout_boxes = parse_layout(image)
+    a = lmap(is_large_enough, layout_boxes)
+    return lmap(is_large_enough, layout_boxes)
+
+
+def preprocess(image: np.array):
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image
+    _, image = cv2.threshold(image, 195, 255, cv2.THRESH_BINARY)
+    return ~image
+
+
+def turn_connected_components_into_rects(image: np.array):
+    def is_large_enough(stat):
+        x1, y1, w, h, area = stat
+        return area > 2000 and w > 35 and h > 25
+
+    _, _, stats, _ = cv2.connectedComponentsWithStats(~image, connectivity=8, ltype=cv2.CV_32S)
+
+    stats = lfilter(is_large_enough, stats)
+    if stats:
+        stats = np.vstack(stats)
+        return stats[:, :-1][2:]
+    return []
+
+
+def parse_tables(image: np.array, show=False):
+    """Runs the full table parsing process.
+
+    Args:
+        image (np.array): single PDF page, converted to a numpy array
+
+    Returns:
+        list: list of rectangles corresponding to table cells
+    """
+
+    image = preprocess(image)
+    image = isolate_vertical_and_horizontal_components(image)
+    rects = turn_connected_components_into_rects(image)
+    # print(rects, "\n\n")
+    rects = list(map(Rectangle.from_xywh, rects))
+    # print(rects, "\n\n")
+    rects = remove_isolated(rects)
+    # print(rects, "\n\n")
+
+    return rects
--- a/src/cv_analysis/utils/init.py
+++ b/src/cv_analysis/utils/init.py
--- a/src/cv_analysis/utils/banner.py
+++ b/src/cv_analysis/utils/banner.py
@ -1,13 +1,13 @@
 def make_art():
    art = r"""
-       __
-   _  |@@|
+       __           
+   _  |@@|          
  / \ \--/ __                                            .__               .__
  ) O|----|  |   __ ___  __         _____    ____ _____  |  | ___.__. _____|__| ______
 / / \ }{ /\ )_ / _\\  \/ /  ______ \__  \  /    \\__  \ |  | |  |  |/  ___/  |/  ___/
 )/  /\__/\ \__O (__ \   /  /_____/  / __ \|   |  \/ __ \|  |_\___  |\___ \|  |\___ \
-|/  (--/\--)    \__/  \_/           (______/___|__(______/____/\____/_____/|__/_____/
-/   _)(  )(_
-   `---''---`
+|/  (--/\--)    \__/  \_/           (______/___|__(______/____/\____/_____/|__/_____/ 
+/   _)(  )(_                                                                          
+   `---''---`       
 """
    return art
--- a/src/cv_analysis/utils/connect_rects.py
+++ b/src/cv_analysis/utils/connect_rects.py
@ -1,4 +1,4 @@
-from itertools import combinations, product, starmap
+from itertools import combinations, starmap, product
 from typing import Iterable


@ -41,12 +41,7 @@ def has_correct_position1(rect_pair):
    return any(
        [
            any(
-                [
-                    abs(x1 - x2) <= 10,
-                    abs(y1 - y2) <= 10,
-                    abs(x1 + w1 - (x2 + w2)) <= 10,
-                    abs(y1 + h1 - (y2 + h2)) <= 10,
-                ]
+                [abs(x1 - x2) <= 10, abs(y1 - y2) <= 10, abs(x1 + w1 - (x2 + w2)) <= 10, abs(y1 + h1 - (y2 + h2)) <= 10]
            ),
            any(
                [
--- a/src/cv_analysis/utils/display.py
+++ b/src/cv_analysis/utils/display.py
@ -1,13 +1,6 @@
-import os
-
 import cv2
 from matplotlib import pyplot as plt

-# if os.environ.get("USER") == "isaac":
-#     import matplotlib
-
-#     matplotlib.use("module://matplotlib-backend-wezterm")
-

 def show_image_cv2(image, maxdim=700):
    h, w, c = image.shape
--- a/src/cv_analysis/utils/draw.py
+++ b/src/cv_analysis/utils/draw.py
@ -4,6 +4,7 @@ from cv_analysis.utils import copy_and_normalize_channels


 def draw_contours(image, contours, color=None, annotate=False):
+
    image = copy_and_normalize_channels(image)

    for cont in contours:
--- a/src/cv_analysis/utils/filters.py
+++ b/src/cv_analysis/utils/filters.py
--- a/src/cv_analysis/utils/open_pdf.py
+++ b/src/cv_analysis/utils/open_pdf.py
@ -1,11 +1,12 @@
-import pdf2image
 from numpy import array, ndarray
+import pdf2image
 from PIL import Image

 from cv_analysis.utils.preprocessing import preprocess_page_array


 def open_pdf(pdf, first_page=0, last_page=None):
+
    first_page += 1
    last_page = None if last_page is None else last_page + 1

--- a/src/cv_analysis/utils/postprocessing.py
+++ b/src/cv_analysis/utils/postprocessing.py
@ -1,28 +1,27 @@
-from collections import namedtuple
 from functools import partial
-from itertools import compress, starmap
-from typing import Iterable, List
+from itertools import starmap, compress
+from typing import Iterable

 from cv_analysis.utils.structures import Rectangle


-def remove_overlapping(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
+def remove_overlapping(rectangles: Iterable[Rectangle]) -> list[Rectangle]:
    def overlap(a: Rectangle, rect2: Rectangle) -> float:
        return a.intersection(rect2) > 0

-    def does_not_overlap(rect: Rectangle, rectangles: Iterable[Rectangle]) -> bool:
+    def does_not_overlap(rect: Rectangle, rectangles: Iterable[Rectangle]) -> list:
        return not any(overlap(rect, rect2) for rect2 in rectangles if not rect == rect2)

    rectangles = list(filter(partial(does_not_overlap, rectangles=rectangles), rectangles))
    return rectangles


-def remove_included(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
+def remove_included(rectangles: Iterable[Rectangle]) -> list[Rectangle]:
    keep = [rect for rect in rectangles if not rect.is_included(rectangles)]
    return keep


-def __remove_isolated_unsorted(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
+def __remove_isolated_unsorted(rectangles: Iterable[Rectangle]) -> list[Rectangle]:
    def is_connected(rect: Rectangle, rectangles: Iterable[Rectangle]):
        return any(rect.adjacent(rect2) for rect2 in rectangles if not rect == rect2)

@ -30,7 +29,7 @@ def __remove_isolated_unsorted(rectangles: Iterable[Rectangle]) -> List[Rectangl
    return rectangles


-def __remove_isolated_sorted(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
+def __remove_isolated_sorted(rectangles: Iterable[Rectangle]) -> list[Rectangle]:
    def is_connected(left, center, right):
        return any([left.adjacent(center), center.adjacent(right)])

@ -43,7 +42,7 @@ def __remove_isolated_sorted(rectangles: Iterable[Rectangle]) -> List[Rectangle]
    return rectangles


-def remove_isolated(rectangles: Iterable[Rectangle], input_unsorted=True) -> List[Rectangle]:
+def remove_isolated(rectangles: Iterable[Rectangle], input_unsorted=True) -> list[Rectangle]:
    return (__remove_isolated_unsorted if input_unsorted else __remove_isolated_sorted)(rectangles)


--- a/src/cv_analysis/utils/preprocessing.py
+++ b/src/cv_analysis/utils/preprocessing.py
@ -1,5 +1,5 @@
-import cv2
 from numpy import frombuffer, ndarray
+import cv2


 def preprocess_page_array(page):
@ -10,6 +10,7 @@ def preprocess_page_array(page):


 def page2image(page):
+
    if type(page) == bytes:
        page = frombuffer(page)
    elif type(page) == ndarray:
--- a/src/cv_analysis/utils/structures.py
+++ b/src/cv_analysis/utils/structures.py
@ -1,23 +1,12 @@
 from json import dumps
-from typing import Iterable

+from typing import Iterable
 import numpy as np
-from funcy import identity  # type: ignore
+from funcy import identity


 class Rectangle:
-    def __init__(
-        self,
-        x1=None,
-        y1=None,
-        w=None,
-        h=None,
-        x2=None,
-        y2=None,
-        indent=4,
-        format="xywh",
-        discrete=True,
-    ):
+    def __init__(self, x1=None, y1=None, w=None, h=None, x2=None, y2=None, indent=4, format="xywh", discrete=True):
        make_discrete = int if discrete else identity

        try:
@ -122,13 +111,7 @@ class Rectangle:

    @classmethod
    def from_dict_xywh(cls, xywh_dict, discrete=True):
-        return cls(
-            x1=xywh_dict["x"],
-            y1=xywh_dict["y"],
-            w=xywh_dict["width"],
-            h=xywh_dict["height"],
-            discrete=discrete,
-        )
+        return cls(x1=xywh_dict["x"], y1=xywh_dict["y"], w=xywh_dict["width"], h=xywh_dict["height"], discrete=discrete)

    def __str__(self):
        return dumps(self.json(), indent=self.indent)
--- a/src/cv_analysis/utils/test_metrics.py
+++ b/src/cv_analysis/utils/test_metrics.py
@ -1,7 +1,5 @@
 from typing import Iterable
-
 import numpy as np
-
 from cv_analysis.utils.structures import Rectangle


@ -28,6 +26,7 @@ def compute_page_iou(results_boxes: Iterable[Rectangle], ground_truth_boxes: Ite


 def compute_document_score(results_dict, annotation_dict):
+
    page_weights = np.array([len(page["cells"]) for page in annotation_dict["pages"]])
    page_weights = page_weights / sum(page_weights)

--- a/src/cv_analysis/utils/utils.py
+++ b/src/cv_analysis/utils/utils.py
@ -1,8 +1,9 @@
-import cv2
 from numpy import generic
+import cv2


 def copy_and_normalize_channels(image):
+
    image = image.copy()
    try:
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
--- a/src/cv_analysis/utils/visual_logging.py
+++ b/src/cv_analysis/utils/visual_logging.py
@ -1,11 +1,9 @@
 import os

-from pyinfra.config.loader import load_settings  # type: ignore
-
 from cv_analysis.config import get_config
 from cv_analysis.utils.display import save_image

-settings = get_config()
+CV_CONFIG = get_config()


 class VisualLogger:
@ -41,4 +39,4 @@ class VisualLogger:
        return self.level == "ALL"


-vizlogger = VisualLogger(settings.logging.visual_logging_level, settings.logging.visual_logging_output_folder)
+vizlogger = VisualLogger(CV_CONFIG.visual_logging_level, CV_CONFIG.visual_logging_output_folder)
--- a/data/2017-1078223.pdf
+++ b/data/2017-1078223.pdf
--- a/data/2017-1078223.vlp_output.annotated.pdf
+++ b/data/2017-1078223.vlp_output.annotated.pdf
--- a/data/2017-1078223.vlp_output.json
+++ b/data/2017-1078223.vlp_output.json
--- a/data/table_inference_test_files.zip
+++ b/data/table_inference_test_files.zip
--- a/devenvsetup.sh
+++ b/devenvsetup.sh
@ -1,30 +0,0 @@
-#!/bin/bash
-python_version=$1
-gitlab_user=$2
-gitlab_personal_access_token=$3
-
-# cookiecutter https://gitlab.knecon.com/knecon/research/template-python-project.git --checkout master
-# latest_dir=$(ls -td -- */ | head -n 1)  # should be the dir cookiecutter just created
-
-# cd $latest_dir
-
-pyenv install $python_version
-pyenv local $python_version
-pyenv shell $python_version
-
-pip install --upgrade pip
-pip install poetry
-
-poetry config installer.max-workers 10
-# research package registry
-poetry config repositories.gitlab-research https://gitlab.knecon.com/api/v4/groups/19/-/packages/pypi
-poetry config http-basic.gitlab-research ${gitlab_user} ${gitlab_personal_access_token}
-# redactmanager package registry
-poetry config repositories.gitlab-red https://gitlab.knecon.com/api/v4/groups/12/-/packages/pypi
-poetry config http-basic.gitlab-red ${gitlab_user} ${gitlab_personal_access_token}
-
-poetry env use $(pyenv which python)
-poetry install --with=dev
-poetry update
-
-source .venv/bin/activate
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -28,4 +28,4 @@ services:
    volumes:
      - /opt/bitnami/rabbitmq/.rabbitmq/:/data/bitnami
 volumes:
-  mdata:
+  mdata:
--- a/docs/build/html/.buildinfo
+++ b/docs/build/html/.buildinfo
@ -1,4 +0,0 @@
-# Sphinx build info version 1
-# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 04e9c6c5d3e412413c2949e598da60dc
-tags: 645f666f9bcd5a90fca523b33c5a78b7
--- a/docs/build/html/.doctrees/README.doctree
+++ b/docs/build/html/.doctrees/README.doctree
--- a/docs/build/html/.doctrees/environment.pickle
+++ b/docs/build/html/.doctrees/environment.pickle
--- a/docs/build/html/.doctrees/index.doctree
+++ b/docs/build/html/.doctrees/index.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.config.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.config.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.figure_detection.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.figure_detection.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.figure_detection.figure_detection.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.figure_detection.figure_detection.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.figure_detection.figures.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.figure_detection.figures.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.figure_detection.text.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.figure_detection.text.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.layout_parsing.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.layout_parsing.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.locations.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.locations.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.redaction_detection.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.redaction_detection.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.server.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.server.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.server.pipeline.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.server.pipeline.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.table_inference.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.table_inference.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.table_parsing.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.table_parsing.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.annotate.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.annotate.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.banner.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.banner.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.connect_rects.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.connect_rects.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.display.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.display.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.draw.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.draw.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.filters.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.filters.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.image_extraction.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.image_extraction.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.open_pdf.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.open_pdf.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.postprocessing.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.postprocessing.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.preprocessing.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.preprocessing.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.structures.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.structures.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.test_metrics.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.test_metrics.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.utils.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.utils.doctree
--- a/docs/build/html/.doctrees/modules/cv_analysis.utils.visual_logging.doctree
+++ b/docs/build/html/.doctrees/modules/cv_analysis.utils.visual_logging.doctree
--- a/docs/build/html/.doctrees/modules/serve.doctree
+++ b/docs/build/html/.doctrees/modules/serve.doctree
--- a/docs/build/html/README.html
+++ b/docs/build/html/README.html
@ -1,657 +0,0 @@
-
-<!DOCTYPE html>
-
-
-<html lang="en" data-content_root="./" >
-
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
-
-    <title>cv-analysis - Visual (CV-Based) Document Parsing &#8212; CV Analysis Service 2.5.2 documentation</title>
-  
-  
-  
-  <script data-cfasync="false">
-    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
-    document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
-  </script>
-  
-  <!-- Loaded before other Sphinx assets -->
-  <link href="_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
-<link href="_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
-<link href="_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
-
-  
-  <link href="_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
-  <link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
-<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
-
-    <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=a746c00c" />
-    <link rel="stylesheet" type="text/css" href="https://assets.readthedocs.org/static/css/badge_only.css" />
-  
-  <!-- Pre-loaded scripts that we'll load fully later -->
-  <link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
-<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
-  <script src="_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
-
-    <script src="_static/documentation_options.js?v=afc61bbc"></script>
-    <script src="_static/doctools.js?v=9a2dae69"></script>
-    <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
-    <script>DOCUMENTATION_OPTIONS.pagename = 'README';</script>
-    <script async="async" src="https://assets.readthedocs.org/static/javascript/readthedocs-doc-embed.js"></script>
-    <link rel="index" title="Index" href="genindex.html" />
-    <link rel="search" title="Search" href="search.html" />
-    <link rel="next" title="cv_analysis package" href="modules/cv_analysis.html" />
-    <link rel="prev" title="Welcome to CV Analysis Service documentation!" href="index.html" />
-  <meta name="viewport" content="width=device-width, initial-scale=1"/>
-  <meta name="docsearch:language" content="en"/>
-  
-<!-- RTD Extra Head -->
-
-<link rel="stylesheet" href="https://assets.readthedocs.org/static/css/readthedocs-doc-embed.css" type="text/css" />
-
-<script type="application/json" id="READTHEDOCS_DATA">{"ad_free": "", "api_host": "", "builder": "sphinx", "canonical_url": "", "docroot": "", "features": {"docsearch_disabled": false}, "global_analytics_code": null, "language": "", "page": "README", "programming_language": "", "project": "", "source_suffix": ".md", "subprojects": {}, "theme": "", "user_analytics_code": null, "version": ""}</script>
-
-<!--
-Using this variable directly instead of using `JSON.parse` is deprecated.
-The READTHEDOCS_DATA global variable will be removed in the future.
-->
-<script type="text/javascript">
-READTHEDOCS_DATA = JSON.parse(document.getElementById('READTHEDOCS_DATA').innerHTML);
-</script>
-
-<script type="text/javascript" src="https://assets.readthedocs.org/static/javascript/readthedocs-analytics.js" async="async"></script>
-
-<!-- end RTD <extrahead> -->
-</head>
-  
-  
-  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
-
-  
-  
-  <a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
-  
-  <div id="pst-scroll-pixel-helper"></div>
-  
-  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
-    <i class="fa-solid fa-arrow-up"></i>
-    Back to top
-  </button>
-
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          name="__primary"
-          id="__primary"/>
-  <label class="overlay overlay-primary" for="__primary"></label>
-  
-  <input type="checkbox"
-          class="sidebar-toggle"
-          name="__secondary"
-          id="__secondary"/>
-  <label class="overlay overlay-secondary" for="__secondary"></label>
-  
-  <div class="search-button__wrapper">
-    <div class="search-button__overlay"></div>
-    <div class="search-button__search-container">
-<form class="bd-search d-flex align-items-center"
-      action="search.html"
-      method="get">
-  <i class="fa-solid fa-magnifying-glass"></i>
-  <input type="search"
-         class="form-control"
-         name="q"
-         id="search-input"
-         placeholder="Search the docs ..."
-         aria-label="Search the docs ..."
-         autocomplete="off"
-         autocorrect="off"
-         autocapitalize="off"
-         spellcheck="false"/>
-  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
-</form></div>
-  </div>
-  
-    <header class="bd-header navbar navbar-expand-lg bd-navbar">
-<div class="bd-header__inner bd-page-width">
-  <label class="sidebar-toggle primary-toggle" for="__primary">
-    <span class="fa-solid fa-bars"></span>
-  </label>
-  
-  
-  <div class="col-lg-3 navbar-header-items__start">
-    
-      <div class="navbar-item">
-
-  
-
-<a class="navbar-brand logo" href="index.html">
-  
-  
-  
-  
-  
-    
-    
-      
-    
-    
-    <img src="_static/logo.png" class="logo__image only-light" alt="CV Analysis Service 2.5.2 documentation - Home"/>
-    <script>document.write(`<img src="_static/logo.png" class="logo__image only-dark" alt="CV Analysis Service 2.5.2 documentation - Home"/>`);</script>
-  
-  
-</a></div>
-    
-  </div>
-  
-  <div class="col-lg-9 navbar-header-items">
-    
-    <div class="me-auto navbar-header-items__center">
-      
-        <div class="navbar-item">
-<nav class="navbar-nav">
-  <ul class="bd-navbar-elements navbar-nav">
-    
-                    <li class="nav-item current active">
-                      <a class="nav-link nav-internal" href="#">
-                        cv-analysis - Visual (CV-Based) Document Parsing
-                      </a>
-                    </li>
-                
-
-                    <li class="nav-item">
-                      <a class="nav-link nav-internal" href="modules/cv_analysis.html">
-                        cv_analysis package
-                      </a>
-                    </li>
-                
-
-                    <li class="nav-item">
-                      <a class="nav-link nav-internal" href="modules/serve.html">
-                        serve module
-                      </a>
-                    </li>
-                
-  </ul>
-</nav></div>
-      
-    </div>
-    
-    
-    <div class="navbar-header-items__end">
-      
-        <div class="navbar-item navbar-persistent--container">
-          
-
- <script>
- document.write(`
-   <button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass"></i>
-    <span class="search-button__default-text">Search</span>
-    <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
-   </button>
- `);
- </script>
-        </div>
-      
-      
-        <div class="navbar-item">
-
-<script>
-document.write(`
-  <button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
-    <span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
-    <span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
-  </button>
-`);
-</script></div>
-      
-    </div>
-    
-  </div>
-  
-  
-    <div class="navbar-persistent--mobile">
-
- <script>
- document.write(`
-   <button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <i class="fa-solid fa-magnifying-glass"></i>
-    <span class="search-button__default-text">Search</span>
-    <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
-   </button>
- `);
- </script>
-    </div>
-  
-
-  
-    <label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
-      <span class="fa-solid fa-outdent"></span>
-    </label>
-  
-</div>
-
-    </header>
-  
-
-  <div class="bd-container">
-    <div class="bd-container__inner bd-page-width">
-      
-      
-      
-      <div class="bd-sidebar-primary bd-sidebar">
-        
-
-  
-  <div class="sidebar-header-items sidebar-primary__section">
-    
-    
-      <div class="sidebar-header-items__center">
-        
-          <div class="navbar-item">
-<nav class="navbar-nav">
-  <ul class="bd-navbar-elements navbar-nav">
-    
-                    <li class="nav-item current active">
-                      <a class="nav-link nav-internal" href="#">
-                        cv-analysis - Visual (CV-Based) Document Parsing
-                      </a>
-                    </li>
-                
-
-                    <li class="nav-item">
-                      <a class="nav-link nav-internal" href="modules/cv_analysis.html">
-                        cv_analysis package
-                      </a>
-                    </li>
-                
-
-                    <li class="nav-item">
-                      <a class="nav-link nav-internal" href="modules/serve.html">
-                        serve module
-                      </a>
-                    </li>
-                
-  </ul>
-</nav></div>
-        
-      </div>
-    
-    
-    
-      <div class="sidebar-header-items__end">
-        
-          <div class="navbar-item">
-
-<script>
-document.write(`
-  <button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
-    <span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
-    <span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
-    <span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
-  </button>
-`);
-</script></div>
-        
-      </div>
-    
-  </div>
-  
-    <div class="sidebar-primary-items__start sidebar-primary__section">
-        <div class="sidebar-primary-item">
-<nav class="bd-docs-nav bd-links"
-     aria-label="Section Navigation">
-  <p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
-  <div class="bd-toc-item navbar-nav"></div>
-</nav></div>
-    </div>
-  
-  
-  <div class="sidebar-primary-items__end sidebar-primary__section">
-  </div>
-  
-  <div id="rtd-footer-container"></div>
-
-
-      </div>
-      
-      <main id="main-content" class="bd-main">
-        
-        
-          <div class="bd-content">
-            <div class="bd-article-container">
-              
-              <div class="bd-header-article">
-<div class="header-article-items header-article__inner">
-  
-    <div class="header-article-items__start">
-      
-        <div class="header-article-item">
-
-
-
-<nav aria-label="Breadcrumb">
-  <ul class="bd-breadcrumbs">
-    
-    <li class="breadcrumb-item breadcrumb-home">
-      <a href="index.html" class="nav-link" aria-label="Home">
-        <i class="fa-solid fa-home"></i>
-      </a>
-    </li>
-    <li class="breadcrumb-item active" aria-current="page">cv-analysis...</li>
-  </ul>
-</nav>
-</div>
-      
-    </div>
-  
-  
-</div>
-</div>
-              
-              
-              
-                
-<div id="searchbox"></div>
-                <article class="bd-article">
-                  
-  <section id="cv-analysis-visual-cv-based-document-parsing">
-<h1>cv-analysis - Visual (CV-Based) Document Parsing<a class="headerlink" href="#cv-analysis-visual-cv-based-document-parsing" title="Link to this heading">#</a></h1>
-<p>parse_pdf()
-This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
-previous redactions in documents.</p>
-<section id="api">
-<h2>API<a class="headerlink" href="#api" title="Link to this heading">#</a></h2>
-<p>Input message:</p>
-<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
-<span class="w">  </span><span class="nt">&quot;targetFilePath&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
-<span class="w">    </span><span class="nt">&quot;pdf&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;absolute file path&quot;</span><span class="p">,</span>
-<span class="w">    </span><span class="nt">&quot;vlp_output&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;absolute file path&quot;</span>
-<span class="w">  </span><span class="p">},</span>
-<span class="w">  </span><span class="nt">&quot;responseFilePath&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;absolute file path&quot;</span><span class="p">,</span>
-<span class="w">  </span><span class="nt">&quot;operation&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;table_image_inference&quot;</span>
-<span class="p">}</span>
-</pre></div>
-</div>
-<p>Response is uploaded to the storage as specified in the <code class="docutils literal notranslate"><span class="pre">responseFilePath</span></code> field. The structure is as follows:</p>
-<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
-<span class="w">  </span><span class="err">...</span><span class="p">,</span>
-<span class="w">  </span><span class="nt">&quot;data&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
-<span class="w">    </span><span class="p">{</span>
-<span class="w">      </span><span class="err">&#39;pageNum&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span>
-<span class="w">      </span><span class="err">&#39;bbox&#39;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
-<span class="w">        </span><span class="err">&#39;x</span><span class="mi">1</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mf">55.3407</span><span class="p">,</span>
-<span class="w">        </span><span class="err">&#39;y</span><span class="mi">1</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mf">247.0246</span><span class="p">,</span>
-<span class="w">        </span><span class="err">&#39;x</span><span class="mi">2</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mf">558.5602</span><span class="p">,</span>
-<span class="w">        </span><span class="err">&#39;y</span><span class="mi">2</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mf">598.0585</span>
-<span class="w">      </span><span class="p">},</span>
-<span class="w">      </span><span class="err">&#39;uuid&#39;</span><span class="p">:</span><span class="w"> </span><span class="err">&#39;</span><span class="mi">2</span><span class="err">b</span><span class="mi">10</span><span class="err">c</span><span class="mi">1</span><span class="err">a</span><span class="mi">2-393</span><span class="err">c</span><span class="mi">-4</span><span class="kc">f</span><span class="err">ca</span><span class="mi">-</span><span class="err">b</span><span class="mf">9e3-0</span><span class="err">ad</span><span class="mi">5</span><span class="err">b</span><span class="mi">774</span><span class="err">ac</span><span class="mi">84</span><span class="err">&#39;</span><span class="p">,</span>
-<span class="w">      </span><span class="err">&#39;label&#39;</span><span class="p">:</span><span class="w"> </span><span class="err">&#39;</span><span class="kc">ta</span><span class="err">ble&#39;</span><span class="p">,</span>
-<span class="w">      </span><span class="err">&#39;</span><span class="kc">ta</span><span class="err">bleLi</span><span class="kc">nes</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
-<span class="w">        </span><span class="p">{</span>
-<span class="w">          </span><span class="err">&#39;x</span><span class="mi">1</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span>
-<span class="w">          </span><span class="err">&#39;y</span><span class="mi">1</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">16</span><span class="p">,</span>
-<span class="w">          </span><span class="err">&#39;x</span><span class="mi">2</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">1399</span><span class="p">,</span>
-<span class="w">          </span><span class="err">&#39;y</span><span class="mi">2</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">16</span>
-<span class="w">        </span><span class="p">},</span>
-<span class="w">        </span><span class="err">...</span>
-<span class="w">      </span><span class="p">],</span>
-<span class="w">      </span><span class="err">&#39;imageI</span><span class="kc">nf</span><span class="err">o&#39;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
-<span class="w">        </span><span class="err">&#39;heigh</span><span class="kc">t</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">693</span><span class="p">,</span>
-<span class="w">        </span><span class="err">&#39;wid</span><span class="kc">t</span><span class="err">h&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">1414</span>
-<span class="w">      </span><span class="p">}</span>
-<span class="w">    </span><span class="p">},</span>
-<span class="w">    </span><span class="err">...</span>
-<span class="w">  </span><span class="p">]</span>
-<span class="p">}</span>
-</pre></div>
-</div>
-</section>
-<section id="installation">
-<h2>Installation<a class="headerlink" href="#installation" title="Link to this heading">#</a></h2>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>git<span class="w"> </span>clone<span class="w"> </span>ssh://git@git.iqser.com:2222/rr/cv-analysis.git
-<span class="nb">cd</span><span class="w"> </span>cv-analysis
-
-python<span class="w"> </span>-m<span class="w"> </span>venv<span class="w"> </span>env
-<span class="nb">source</span><span class="w"> </span>env/bin/activate
-
-pip<span class="w"> </span>install<span class="w"> </span>-e<span class="w"> </span>.
-pip<span class="w"> </span>install<span class="w"> </span>-r<span class="w"> </span>requirements.txt
-
-dvc<span class="w"> </span>pull
-</pre></div>
-</div>
-</section>
-<section id="usage">
-<h2>Usage<a class="headerlink" href="#usage" title="Link to this heading">#</a></h2>
-<section id="as-an-api">
-<h3>As an API<a class="headerlink" href="#as-an-api" title="Link to this heading">#</a></h3>
-<p>The module provided functions for the individual tasks that all return some kind of collection of points, depending on
-the specific task.</p>
-<section id="redaction-detection-api">
-<h4>Redaction Detection (API)<a class="headerlink" href="#redaction-detection-api" title="Link to this heading">#</a></h4>
-<p>The below snippet shows hot to find the outlines of previous redactions.</p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">cv_analysis.redaction_detection</span> <span class="kn">import</span> <span class="n">find_redactions</span>
-<span class="kn">import</span> <span class="nn">pdf2image</span>
-<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
-
-<span class="n">pdf_path</span> <span class="o">=</span> <span class="o">...</span>
-<span class="n">page_index</span> <span class="o">=</span> <span class="o">...</span>
-
-<span class="n">page</span> <span class="o">=</span> <span class="n">pdf2image</span><span class="o">.</span><span class="n">convert_from_path</span><span class="p">(</span><span class="n">pdf_path</span><span class="p">,</span> <span class="n">first_page</span><span class="o">=</span><span class="n">page_index</span><span class="p">,</span> <span class="n">last_page</span><span class="o">=</span><span class="n">page_index</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
-<span class="n">page</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">page</span><span class="p">)</span>
-
-<span class="n">redaction_contours</span> <span class="o">=</span> <span class="n">find_redactions</span><span class="p">(</span><span class="n">page</span><span class="p">)</span>
-</pre></div>
-</div>
-</section>
-</section>
-</section>
-<section id="as-a-cli-tool">
-<h2>As a CLI Tool<a class="headerlink" href="#as-a-cli-tool" title="Link to this heading">#</a></h2>
-<p>Core API functionalities can be used through a CLI.</p>
-<section id="table-parsing">
-<h3>Table Parsing<a class="headerlink" href="#table-parsing" title="Link to this heading">#</a></h3>
-<p>The tables parsing utility detects and segments tables into individual cells.</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">7</span><span class="w"> </span>--type<span class="w"> </span>table
-</pre></div>
-</div>
-<p>The below image shows a parsed table, where each table cell has been detected individually.</p>
-<p><img alt="Table Parsing Demonstration" src="_images/table_parsing.png" /></p>
-</section>
-<section id="redaction-detection-cli">
-<h3>Redaction Detection (CLI)<a class="headerlink" href="#redaction-detection-cli" title="Link to this heading">#</a></h3>
-<p>The redaction detection utility detects previous redactions in PDFs (filled black rectangles).</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">2</span><span class="w"> </span>--type<span class="w"> </span>redaction
-</pre></div>
-</div>
-<p>The below image shows the detected redactions with green outlines.</p>
-<p><img alt="Redaction Detection Demonstration" src="_images/redaction_detection.png" /></p>
-</section>
-<section id="layout-parsing">
-<h3>Layout Parsing<a class="headerlink" href="#layout-parsing" title="Link to this heading">#</a></h3>
-<p>The layout parsing utility detects elements such as paragraphs, tables and figures.</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">7</span><span class="w"> </span>--type<span class="w"> </span>layout
-</pre></div>
-</div>
-<p>The below image shows the detected layout elements on a page.</p>
-<p><img alt="Layout Parsing Demonstration" src="_images/layout_parsing.png" /></p>
-</section>
-<section id="figure-detection">
-<h3>Figure Detection<a class="headerlink" href="#figure-detection" title="Link to this heading">#</a></h3>
-<p>The figure detection utility detects figures specifically, which can be missed by the generic layout parsing utility.</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">3</span><span class="w"> </span>--type<span class="w"> </span>figure
-</pre></div>
-</div>
-<p>The below image shows the detected figure on a page.</p>
-<p><img alt="Figure Detection Demonstration" src="_images/figure_detection.png" /></p>
-</section>
-</section>
-<section id="running-as-a-service">
-<h2>Running as a service<a class="headerlink" href="#running-as-a-service" title="Link to this heading">#</a></h2>
-<section id="building">
-<h3>Building<a class="headerlink" href="#building" title="Link to this heading">#</a></h3>
-<p>Build base image</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>bash<span class="w"> </span>setup/docker.sh
-</pre></div>
-</div>
-<p>Build head image</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>build<span class="w"> </span>-f<span class="w"> </span>Dockerfile<span class="w"> </span>-t<span class="w"> </span>cv-analysis<span class="w"> </span>.<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">BASE_ROOT</span><span class="o">=</span><span class="s2">&quot;&quot;</span>
-</pre></div>
-</div>
-</section>
-<section id="usage-service">
-<h3>Usage (service)<a class="headerlink" href="#usage-service" title="Link to this heading">#</a></h3>
-<p>Shell 1</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>run<span class="w"> </span>--rm<span class="w"> </span>--net<span class="o">=</span>host<span class="w"> </span>--rm<span class="w"> </span>cv-analysis
-</pre></div>
-</div>
-<p>Shell 2</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/client_mock.py<span class="w"> </span>--pdf_path<span class="w"> </span>/path/to/a/pdf
-</pre></div>
-</div>
-</section>
-</section>
-</section>
-
-
-                </article>
-              
-              
-              
-              
-              
-                <footer class="prev-next-footer">
-                  
-<div class="prev-next-area">
-    <a class="left-prev"
-       href="index.html"
-       title="previous page">
-      <i class="fa-solid fa-angle-left"></i>
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">previous</p>
-        <p class="prev-next-title">Welcome to CV Analysis Service documentation!</p>
-      </div>
-    </a>
-    <a class="right-next"
-       href="modules/cv_analysis.html"
-       title="next page">
-      <div class="prev-next-info">
-        <p class="prev-next-subtitle">next</p>
-        <p class="prev-next-title">cv_analysis package</p>
-      </div>
-      <i class="fa-solid fa-angle-right"></i>
-    </a>
-</div>
-                </footer>
-              
-            </div>
-            
-            
-              
-                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
-
-
-  <div class="sidebar-secondary-item">
-<div
-    id="pst-page-navigation-heading-2"
-    class="page-toc tocsection onthispage">
-    <i class="fa-solid fa-list"></i> On this page
-  </div>
-  <nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
-    <ul class="visible nav section-nav flex-column">
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#api">API</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#installation">Installation</a></li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#usage">Usage</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#as-an-api">As an API</a><ul class="nav section-nav flex-column">
-<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#redaction-detection-api">Redaction Detection (API)</a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#as-a-cli-tool">As a CLI Tool</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#table-parsing">Table Parsing</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#redaction-detection-cli">Redaction Detection (CLI)</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#layout-parsing">Layout Parsing</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#figure-detection">Figure Detection</a></li>
-</ul>
-</li>
-<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#running-as-a-service">Running as a service</a><ul class="nav section-nav flex-column">
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#building">Building</a></li>
-<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#usage-service">Usage (service)</a></li>
-</ul>
-</li>
-</ul>
-  </nav></div>
-
-  <div class="sidebar-secondary-item">
-
-  <div class="tocsection sourcelink">
-    <a href="_sources/README.md.txt">
-      <i class="fa-solid fa-file-lines"></i> Show Source
-    </a>
-  </div>
-</div>
-
-</div></div>
-              
-            
-          </div>
-          <footer class="bd-footer-content">
-            
-          </footer>
-        
-      </main>
-    </div>
-  </div>
-  
-  <!-- Scripts loaded after <body> so the DOM is not blocked -->
-  <script src="_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
-<script src="_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
-
-  <footer class="bd-footer">
-<div class="bd-footer__inner bd-page-width">
-  
-    <div class="footer-items__start">
-      
-        <div class="footer-item">
-
-  <p class="copyright">
-    
-      © Copyright All rights reserved.
-      <br/>
-    
-  </p>
-</div>
-      
-        <div class="footer-item">
-
-  <p class="sphinx-version">
-    Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 7.3.7.
-    <br/>
-  </p>
-</div>
-      
-    </div>
-  
-  
-  
-    <div class="footer-items__end">
-      
-        <div class="footer-item">
-<p class="theme-version">
-  Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
-</p></div>
-      
-    </div>
-  
-</div>
-
-  </footer>
-  </body>
-</html>
--- a/docs/build/html/_images/figure_detection.png
+++ b/docs/build/html/_images/figure_detection.png
--- a/docs/build/html/_images/layout_parsing.png
+++ b/docs/build/html/_images/layout_parsing.png
--- a/docs/build/html/_images/redaction_detection.png
+++ b/docs/build/html/_images/redaction_detection.png
--- a/docs/build/html/_images/table_parsing.png
+++ b/docs/build/html/_images/table_parsing.png
--- a/docs/build/html/_sources/README.md.txt
+++ b/docs/build/html/_sources/README.md.txt
@ -1,178 +0,0 @@
-# cv-analysis - Visual (CV-Based) Document Parsing
-
-parse_pdf()
-This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
-previous redactions in documents.
-
-## API
-
-Input message:
-
-```json
-{
-  "targetFilePath": {
-    "pdf": "absolute file path",
-    "vlp_output": "absolute file path"
-  },
-  "responseFilePath": "absolute file path",
-  "operation": "table_image_inference"
-}
-```
-
-Response is uploaded to the storage as specified in the `responseFilePath` field. The structure is as follows:
-
-```json
-{
-  ...,
-  "data": [
-    {
-      'pageNum': 0,
-      'bbox': {
-        'x1': 55.3407,
-        'y1': 247.0246,
-        'x2': 558.5602,
-        'y2': 598.0585
-      },
-      'uuid': '2b10c1a2-393c-4fca-b9e3-0ad5b774ac84',
-      'label': 'table',
-      'tableLines': [
-        {
-          'x1': 0,
-          'y1': 16,
-          'x2': 1399,
-          'y2': 16
-        },
-        ...
-      ],
-      'imageInfo': {
-        'height': 693,
-        'width': 1414
-      }
-    },
-    ...
-  ]
-}
-
-```
-
-## Installation
-
-```bash
-git clone ssh://git@git.iqser.com:2222/rr/cv-analysis.git
-cd cv-analysis
-
-python -m venv env
-source env/bin/activate
-
-pip install -e .
-pip install -r requirements.txt
-
-dvc pull
-```
-
-## Usage
-
-### As an API
-
-The module provided functions for the individual tasks that all return some kind of collection of points, depending on
-the specific task.
-
-#### Redaction Detection (API)
-
-The below snippet shows hot to find the outlines of previous redactions.
-
-```python
-from cv_analysis.redaction_detection import find_redactions
-import pdf2image
-import numpy as np
-
-pdf_path = ...
-page_index = ...
-
-page = pdf2image.convert_from_path(pdf_path, first_page=page_index, last_page=page_index)[0]
-page = np.array(page)
-
-redaction_contours = find_redactions(page)
-```
-
-## As a CLI Tool
-
-Core API functionalities can be used through a CLI.
-
-### Table Parsing
-
-The tables parsing utility detects and segments tables into individual cells.
-
-```bash
-python scripts/annotate.py data/test_pdf.pdf 7 --type table
-```
-
-The below image shows a parsed table, where each table cell has been detected individually.
-
-![Table Parsing Demonstration](data/table_parsing.png)
-
-### Redaction Detection (CLI)
-
-The redaction detection utility detects previous redactions in PDFs (filled black rectangles).
-
-```bash
-python scripts/annotate.py data/test_pdf.pdf 2 --type redaction
-```
-
-The below image shows the detected redactions with green outlines.
-
-![Redaction Detection Demonstration](data/redaction_detection.png)
-
-### Layout Parsing
-
-The layout parsing utility detects elements such as paragraphs, tables and figures.
-
-```bash
-python scripts/annotate.py data/test_pdf.pdf 7 --type layout
-```
-
-The below image shows the detected layout elements on a page.
-
-![Layout Parsing Demonstration](data/layout_parsing.png)
-
-### Figure Detection
-
-The figure detection utility detects figures specifically, which can be missed by the generic layout parsing utility.
-
-```bash
-python scripts/annotate.py data/test_pdf.pdf 3 --type figure
-```
-
-The below image shows the detected figure on a page.
-
-![Figure Detection Demonstration](data/figure_detection.png)
-
-## Running as a service
-
-### Building
-
-Build base image
-
-```bash
-bash setup/docker.sh
-```
-
-Build head image
-
-```bash
-docker build -f Dockerfile -t cv-analysis . --build-arg BASE_ROOT=""
-```
-
-### Usage (service)
-
-Shell 1
-
-```bash
-docker run --rm --net=host --rm cv-analysis
-```
-
-Shell 2
-
-```bash
-python scripts/client_mock.py --pdf_path /path/to/a/pdf
-```
--- a/docs/build/html/_sources/index.rst.txt
+++ b/docs/build/html/_sources/index.rst.txt
@ -1,37 +0,0 @@
-.. Keyword Extraction Service documentation master file, created by
-   sphinx-quickstart on Mon Sep 12 12:04:24 2022.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
-=============================================
-Welcome to CV Analysis Service documentation!
-=============================================
-
-.. note::
-   
-   If you'd like to change the looks of things 👉 https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html
-
-
-Table of Contents
-----------------
-
-.. toctree::
-   :maxdepth: 3
-   :caption: README
-   
-   README.md
-   
-.. toctree::
-   :maxdepth: 3
-   :caption: Modules
-
-   modules/cv_analysis
-   modules/serve
-
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
--- a/docs/build/html/_sources/modules/cv_analysis.config.rst.txt
+++ b/docs/build/html/_sources/modules/cv_analysis.config.rst.txt
@ -1,7 +0,0 @@
-cv\_analysis.config module
-==========================
-
-.. automodule:: cv_analysis.config
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/build/html/_sources/modules/cv_analysis.figure_detection.figure_detection.rst.txt
+++ b/docs/build/html/_sources/modules/cv_analysis.figure_detection.figure_detection.rst.txt
@ -1,7 +0,0 @@
-cv\_analysis.figure\_detection.figure\_detection module
-=======================================================
-
-.. automodule:: cv_analysis.figure_detection.figure_detection
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/build/html/_sources/modules/cv_analysis.figure_detection.figures.rst.txt
+++ b/docs/build/html/_sources/modules/cv_analysis.figure_detection.figures.rst.txt
@ -1,7 +0,0 @@
-cv\_analysis.figure\_detection.figures module
-=============================================
-
-.. automodule:: cv_analysis.figure_detection.figures
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/build/html/_sources/modules/cv_analysis.figure_detection.rst.txt
+++ b/docs/build/html/_sources/modules/cv_analysis.figure_detection.rst.txt
@ -1,17 +0,0 @@
-cv\_analysis.figure\_detection package
-======================================
-
-.. automodule:: cv_analysis.figure_detection
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Submodules
----------
-
-.. toctree::
-   :maxdepth: 4
-
-   cv_analysis.figure_detection.figure_detection
-   cv_analysis.figure_detection.figures
-   cv_analysis.figure_detection.text
--- a/docs/build/html/_sources/modules/cv_analysis.figure_detection.text.rst.txt
+++ b/docs/build/html/_sources/modules/cv_analysis.figure_detection.text.rst.txt
@ -1,7 +0,0 @@
-cv\_analysis.figure\_detection.text module
-==========================================
-
-.. automodule:: cv_analysis.figure_detection.text
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/build/html/_sources/modules/cv_analysis.layout_parsing.rst.txt
+++ b/docs/build/html/_sources/modules/cv_analysis.layout_parsing.rst.txt
@ -1,7 +0,0 @@
-cv\_analysis.layout\_parsing module
-===================================
-
-.. automodule:: cv_analysis.layout_parsing
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/build/html/_sources/modules/cv_analysis.locations.rst.txt
+++ b/docs/build/html/_sources/modules/cv_analysis.locations.rst.txt
@ -1,7 +0,0 @@
-cv\_analysis.locations module
-=============================
-
-.. automodule:: cv_analysis.locations
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/build/html/_sources/modules/cv_analysis.redaction_detection.rst.txt
+++ b/docs/build/html/_sources/modules/cv_analysis.redaction_detection.rst.txt
@ -1,7 +0,0 @@
-cv\_analysis.redaction\_detection module
-========================================
-
-.. automodule:: cv_analysis.redaction_detection
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/build/html/_sources/modules/cv_analysis.rst.txt
+++ b/docs/build/html/_sources/modules/cv_analysis.rst.txt
@ -1,30 +0,0 @@
-cv\_analysis package
-====================
-
-.. automodule:: cv_analysis
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Subpackages
-----------
-
-.. toctree::
-   :maxdepth: 4
-
-   cv_analysis.figure_detection
-   cv_analysis.server
-   cv_analysis.utils
-
-Submodules
----------
-
-.. toctree::
-   :maxdepth: 4
-
-   cv_analysis.config
-   cv_analysis.layout_parsing
-   cv_analysis.locations
-   cv_analysis.redaction_detection
-   cv_analysis.table_inference
-   cv_analysis.table_parsing
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
cdietrich	e3f06da823	Merge branch 'master' of ssh://git.iqser.com:2222/rr/cv-analysis into clean_cv	2022-10-06 16:33:36 +02:00
cdietrich	c25c8d764e	black	2022-10-04 13:40:42 +02:00
cdietrich	dcab1e8616	black	2022-09-30 09:59:31 +02:00