Compare commits

..

No commits in common. "master" and "2.3.0" have entirely different histories.

270 changed files with 2355 additions and 157325 deletions

View File

@ -10,7 +10,7 @@ omit =
*/build_venv/*
*/incl/*
source =
cv_analysis
cv_analysis
relative_files = True
data_file = .coverage
@ -46,4 +46,4 @@ ignore_errors = True
directory = reports
[xml]
output = reports/coverage.xml
output = reports/coverage.xml

View File

@ -97,4 +97,4 @@ target/
*.swp
*/*.swp
*/*/*.swp
*/*/*/*.swp
*/*/*/*.swp

View File

@ -5,6 +5,3 @@
port = 22
['remote "azure_remote"']
url = azure://cv-sa-dvc/
connection_string = "DefaultEndpointsProtocol=https;AccountName=cvsacricket;AccountKey=KOuTAQ6Mp00ePTT5ObYmgaHlxwS1qukY4QU4Kuk7gy/vldneA+ZiKjaOpEFtqKA6Mtym2gQz8THy+ASts/Y1Bw==;EndpointSuffix=core.windows.net"
['remote "local"']
url = ../dvc_local_remote

3
.gitignore vendored
View File

@ -48,5 +48,4 @@ __pycache__/
!data/*
!drivers
# unignore files
!bom.*
# ignore files

View File

@ -1,30 +0,0 @@
include:
- project: "Gitlab/gitlab"
ref: 0.3.0
file: "/ci-templates/research/dvc-versioning-build-release.gitlab-ci.yml"
variables:
NEXUS_PROJECT_DIR: red
IMAGENAME: "${CI_PROJECT_NAME}"
#################################
# temp. disable integration tests, b/c they don't cover the CV analysis case yet
trigger integration tests:
rules:
- when: never
release build:
stage: release
needs:
- job: set custom version
artifacts: true
optional: true
- job: calculate patch version
artifacts: true
optional: true
- job: calculate minor version
artifacts: true
optional: true
- job: build docker nexus
artifacts: true
#################################

View File

@ -1,35 +1,30 @@
# CI for services, check gitlab repo for python package CI
include:
- project: "Gitlab/gitlab"
ref: main
file: "/ci-templates/research/versioning-build-test-release.gitlab-ci.yml"
- project: "Gitlab/gitlab"
ref: main
file: "/ci-templates/research/docs.gitlab-ci.yml"
ref: 0.3.0
file: "/ci-templates/research/dvc-versioning-build-release.gitlab-ci.yml"
# set project variables here
variables:
NEXUS_PROJECT_DIR: red # subfolder in Nexus docker-gin where your container will be stored
IMAGENAME: $CI_PROJECT_NAME # if the project URL is gitlab.example.com/group-name/project-1, CI_PROJECT_NAME is project-1
NEXUS_PROJECT_DIR: red
IMAGENAME: "${CI_PROJECT_NAME}"
pages:
only:
- master # KEEP THIS, necessary because `master` branch and not `main` branch
###################
# INTEGRATION TESTS
trigger-integration-tests:
extends: .integration-tests
# ADD THE MODEL BUILD WHICH SHOULD TRIGGER THE INTEGRATION TESTS
# needs:
# - job: docker-build::model_name
# artifacts: true
#################################
# temp. disable integration tests, b/c they don't cover the CV analysis case yet
trigger integration tests:
rules:
- when: never
#########
# RELEASE
release:
extends: .release
release build:
stage: release
needs:
- !reference [.needs-versioning, needs] # leave this line as is
- job: set custom version
artifacts: true
optional: true
- job: calculate patch version
artifacts: true
optional: true
- job: calculate minor version
artifacts: true
optional: true
- job: build docker nexus
artifacts: true
#################################

View File

@ -1,61 +0,0 @@
import subprocess
import sys
from pathlib import Path
import semver
from loguru import logger
from semver.version import Version
logger.remove()
logger.add(sys.stdout, level="INFO")
def bashcmd(cmds: list) -> str:
try:
logger.debug(f"running: {' '.join(cmds)}")
return subprocess.run(cmds, check=True, capture_output=True, text=True).stdout.strip("\n")
except:
logger.warning(f"Error executing the following bash command: {' '.join(cmds)}.")
raise
def get_highest_existing_git_version_tag() -> str:
"""Get highest versions from git tags depending on bump level"""
try:
git_tags = bashcmd(["git", "tag", "-l"]).split()
semver_compat_tags = list(filter(Version.is_valid, git_tags))
highest_git_version_tag = max(semver_compat_tags, key=semver.version.Version.parse)
logger.info(f"Highest git version tag: {highest_git_version_tag}")
return highest_git_version_tag
except:
logger.warning("Error getting git version tags")
raise
def auto_bump_version() -> bool:
active = Path(".autoversion").is_file()
logger.debug(f"Automated version bump is set to '{active}'")
return active
def main() -> None:
poetry_project_version = bashcmd(["poetry", "version", "-s"])
logger.info(f"Poetry project version: {poetry_project_version}")
highest_git_version_tag = get_highest_existing_git_version_tag()
comparison_result = semver.compare(poetry_project_version, highest_git_version_tag)
if comparison_result in (-1, 0):
logger.warning("Poetry version must be greater than git tag version.")
if auto_bump_version():
logger.info(bashcmd(["poetry", "version", highest_git_version_tag]))
sys.exit(0)
sys.exit(1)
else:
logger.info(f"All good: {poetry_project_version} > {highest_git_version_tag}")
if __name__ == "__main__":
main()

View File

@ -1,72 +0,0 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
exclude: ^(docs/|notebooks/|data/|src/configs/|tests/|.hooks/|bom.json)
default_language_version:
python: python3.10
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
args: [--unsafe] # needed for .gitlab-ci.yml
- id: check-toml
- id: detect-private-key
- id: check-added-large-files
args: ['--maxkb=10000']
- id: check-case-conflict
- id: mixed-line-ending
# - repo: https://github.com/pre-commit/mirrors-pylint
# rev: v3.0.0a5
# hooks:
# - id: pylint
# args:
# - --disable=C0111,R0903,E0401
# - --max-line-length=120
- repo: https://github.com/pre-commit/mirrors-isort
rev: v5.10.1
hooks:
- id: isort
args:
- --profile black
- repo: https://github.com/psf/black
rev: 24.10.0
hooks:
- id: black
# exclude: ^(docs/|notebooks/|data/|src/secrets/)
args:
- --line-length=120
- repo: https://github.com/compilerla/conventional-pre-commit
rev: v4.0.0
hooks:
- id: conventional-pre-commit
pass_filenames: false
stages: [commit-msg]
# args: [] # optional: list of Conventional Commits types to allow e.g. [feat, fix, ci, chore, test]
- repo: local
hooks:
- id: version-checker
name: version-checker
entry: python .hooks/poetry_version_check.py
language: python
always_run: true
additional_dependencies:
- "semver"
- "loguru"
# - repo: local
# hooks:
# - id: docker-build-test
# name: testing docker build
# entry: ./scripts/ops/docker-compose-build-run.sh
# language: script
# # always_run: true
# pass_filenames: false
# args: []
# stages: [pre-commit]

View File

@ -1,19 +1,11 @@
###############
# BUILDER IMAGE
FROM python:3.10-slim as builder
ARG GITLAB_USER
ARG GITLAB_ACCESS_TOKEN
FROM python:3.10
ARG USERNAME
ARG TOKEN
ARG PYPI_REGISTRY_RESEARCH=https://gitlab.knecon.com/api/v4/groups/19/-/packages/pypi
ARG POETRY_SOURCE_REF_RESEARCH=gitlab-research
ARG PYPI_REGISTRY_RED=https://gitlab.knecon.com/api/v4/groups/12/-/packages/pypi
ARG POETRY_SOURCE_REF_RED=gitlab-red
ARG PYPI_REGISTRY_FFORESIGHT=https://gitlab.knecon.com/api/v4/groups/269/-/packages/pypi
ARG POETRY_SOURCE_REF_FFORESIGHT=gitlab-fforesight
ARG VERSION=dev
LABEL maintainer="Research <research@knecon.com>"
@ -21,58 +13,29 @@ LABEL version="${VERSION}"
WORKDIR /app
###########
# ENV SETUP
ENV PYTHONDONTWRITEBYTECODE=true
ENV PYTHONUNBUFFERED=true
ENV POETRY_HOME=/opt/poetry
ENV PATH="$POETRY_HOME/bin:$PATH"
RUN apt-get update && \
apt-get install -y curl git bash build-essential libffi-dev libssl-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN curl -sSL https://install.python-poetry.org | python3 -
RUN poetry --version
COPY ./src ./src
COPY ./config ./config
COPY pyproject.toml poetry.lock ./
RUN poetry config virtualenvs.create true && \
poetry config virtualenvs.in-project true && \
RUN ls -hal
RUN echo "${USERNAME} ${TOKEN} ${PYPI_REGISTRY_RED} ${POETRY_SOURCE_REF_RED} "
RUN poetry config virtualenvs.create false && \
poetry config installer.max-workers 10 && \
poetry config repositories.${POETRY_SOURCE_REF_RESEARCH} ${PYPI_REGISTRY_RESEARCH} && \
poetry config http-basic.${POETRY_SOURCE_REF_RESEARCH} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
poetry config http-basic.${POETRY_SOURCE_REF_RESEARCH} ${USERNAME} ${TOKEN} && \
poetry config repositories.${POETRY_SOURCE_REF_RED} ${PYPI_REGISTRY_RED} && \
poetry config http-basic.${POETRY_SOURCE_REF_RED} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
poetry config repositories.${POETRY_SOURCE_REF_FFORESIGHT} ${PYPI_REGISTRY_FFORESIGHT} && \
poetry config http-basic.${POETRY_SOURCE_REF_FFORESIGHT} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
poetry install --without=dev,docs,test -vv --no-interaction --no-root
poetry config http-basic.${POETRY_SOURCE_REF_RED} ${USERNAME} ${TOKEN} && \
poetry install --without=test -vv --no-interaction
##################
# COPY SOURCE CODE
COPY ./config ./config
COPY ./src ./src
###############
# WORKING IMAGE
FROM python:3.10-slim
# COPY BILL OF MATERIALS (BOM)
COPY bom.json /bom.json
# COPY SOURCE CODE FROM BUILDER IMAGE
COPY --from=builder /app /app
WORKDIR /app
ENV PATH="/app/.venv/bin:$PATH"
############
# NETWORKING
EXPOSE 5000
EXPOSE 8080
################
# LAUNCH COMMAND
CMD [ "python", "src/serve.py"]

View File

@ -1,94 +0,0 @@
.PHONY: \
poetry in-project-venv dev-env use-env install install-dev tests \
update-version sync-version-with-git \
docker docker-build-run docker-build docker-run \
docker-rm docker-rm-container docker-rm-image \
pre-commit get-licenses prep-commit \
docs sphinx_html sphinx_apidoc bom
.DEFAULT_GOAL := run
export DOCKER=docker
export DOCKERFILE=Dockerfile
export IMAGE_NAME=cv_analysis_service-image
export CONTAINER_NAME=cv_analysis_service-container
export HOST_PORT=9999
export CONTAINER_PORT=9999
export PYTHON_VERSION=python3.10
# all commands should be executed in the root dir or the project,
# specific environments should be deactivated
poetry: in-project-venv use-env dev-env
in-project-venv:
poetry config virtualenvs.in-project true
use-env:
poetry env use ${PYTHON_VERSION}
dev-env:
poetry install --with dev && poetry update
install:
poetry add $(pkg)
install-dev:
poetry add --dev $(pkg)
requirements:
poetry export --without-hashes --output requirements.txt
update-version:
poetry version prerelease
sync-version-with-git:
git pull -p && poetry version $(git rev-list --tags --max-count=1 | git describe --tags --abbrev=0)
bom:
cyclonedx-py poetry -o bom.json
docker: docker-rm docker-build-run
docker-build-run: docker-build docker-run
docker-build:
$(DOCKER) build \
--no-cache --progress=plain \
-t $(IMAGE_NAME) -f $(DOCKERFILE) \
--build-arg USERNAME=${USERNAME} \
--build-arg TOKEN=${GITLAB_TOKEN} \
.
docker-run:
$(DOCKER) run -it --rm -p $(HOST_PORT):$(CONTAINER_PORT)/tcp --name $(CONTAINER_NAME) $(IMAGE_NAME)
docker-rm: docker-rm-container docker-rm-image
docker-rm-container:
-$(DOCKER) rm $(CONTAINER_NAME)
docker-rm-image:
-$(DOCKER) image rm $(IMAGE_NAME)
tests:
poetry run pytest ./tests
prep-commit:
docs get-license sync-version-with-git update-version pre-commit
pre-commit:
pre-commit run --all-files
get-licenses:
pip-licenses --format=json --order=license --with-urls > pkg-licenses.json
docs: sphinx_apidoc sphinx_html
sphinx_html:
poetry run sphinx-build -b html docs/source/ docs/build/html -E -a
sphinx_apidoc:
cp ./README.md ./docs/source/README.md && cp -r ./data ./docs/source/data/ && poetry run sphinx-apidoc ./src -o ./docs/source/modules --no-toc --module-first --follow-links --separate --force
bom:
cyclonedx-py poetry -o bom.json

View File

@ -1,60 +1,8 @@
# cv-analysis - Visual (CV-Based) Document Parsing
# cv-analysis &mdash; Visual (CV-Based) Document Parsing
parse_pdf()
This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
previous redactions in documents.
## API
Input message:
```json
{
"targetFilePath": {
"pdf": "absolute file path",
"vlp_output": "absolute file path"
},
"responseFilePath": "absolute file path",
"operation": "table_image_inference"
}
```
Response is uploaded to the storage as specified in the `responseFilePath` field. The structure is as follows:
```json
{
...,
"data": [
{
'pageNum': 0,
'bbox': {
'x1': 55.3407,
'y1': 247.0246,
'x2': 558.5602,
'y2': 598.0585
},
'uuid': '2b10c1a2-393c-4fca-b9e3-0ad5b774ac84',
'label': 'table',
'tableLines': [
{
'x1': 0,
'y1': 16,
'x2': 1399,
'y2': 16
},
...
],
'imageInfo': {
'height': 693,
'width': 1414
}
},
...
]
}
```
## Installation
```bash
@ -83,9 +31,10 @@ The below snippet shows hot to find the outlines of previous redactions.
```python
from cv_analysis.redaction_detection import find_redactions
import pdf2image
import pdf2image
import numpy as np
pdf_path = ...
page_index = ...

30096
bom.json

File diff suppressed because it is too large Load Diff

View File

@ -1,22 +1,11 @@
[asyncio]
max_concurrent_tasks = 10
[dynamic_tenant_queues]
enabled = true
[metrics.prometheus]
enabled = true
prefix = "redactmanager_cv_analysis_service"
[tracing]
enabled = true
# possible values "opentelemetry" | "azure_monitor" (Excpects APPLICATIONINSIGHTS_CONNECTION_STRING environment variable.)
type = "azure_monitor"
[tracing.opentelemetry]
enabled = true
endpoint = "http://otel-collector-opentelemetry-collector.otel-collector:4318/v1/traces"
service_name = "redactmanager_cv_analysis_service"
service_name = "redactmanager_cv_analyisis_service"
exporter = "otlp"
[webserver]
@ -36,15 +25,6 @@ input_queue = "request_queue"
output_queue = "response_queue"
dead_letter_queue = "dead_letter_queue"
tenant_event_queue_suffix = "_tenant_event_queue"
tenant_event_dlq_suffix = "_tenant_events_dlq"
tenant_exchange_name = "tenants-exchange"
queue_expiration_time = 300000 # 5 minutes in milliseconds
service_request_queue_prefix = "cv_analysis_request_queue"
service_request_exchange_name = "cv_analysis_request_exchange"
service_response_exchange_name = "cv_analysis_response_exchange"
service_dlq_name = "cv_analysis_dlq"
[storage]
backend = "s3"
@ -61,7 +41,4 @@ connection_string = ""
[storage.tenant_server]
public_key = ""
endpoint = "http://tenant-user-management:8081/internal-api/tenants"
[kubernetes]
pod_name = "test_pod"
endpoint = "http://tenant-user-management:8081/internal-api/tenants"

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -28,4 +28,4 @@ services:
volumes:
- /opt/bitnami/rabbitmq/.rabbitmq/:/data/bitnami
volumes:
mdata:
mdata:

View File

@ -1,4 +0,0 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: 04e9c6c5d3e412413c2949e598da60dc
tags: 645f666f9bcd5a90fca523b33c5a78b7

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,657 +0,0 @@
<!DOCTYPE html>
<html lang="en" data-content_root="./" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>cv-analysis - Visual (CV-Based) Document Parsing &#8212; CV Analysis Service 2.5.2 documentation</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link href="_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=a746c00c" />
<link rel="stylesheet" type="text/css" href="https://assets.readthedocs.org/static/css/badge_only.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
<script src="_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
<script src="_static/documentation_options.js?v=afc61bbc"></script>
<script src="_static/doctools.js?v=9a2dae69"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'README';</script>
<script async="async" src="https://assets.readthedocs.org/static/javascript/readthedocs-doc-embed.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="cv_analysis package" href="modules/cv_analysis.html" />
<link rel="prev" title="Welcome to CV Analysis Service documentation!" href="index.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<!-- RTD Extra Head -->
<link rel="stylesheet" href="https://assets.readthedocs.org/static/css/readthedocs-doc-embed.css" type="text/css" />
<script type="application/json" id="READTHEDOCS_DATA">{"ad_free": "", "api_host": "", "builder": "sphinx", "canonical_url": "", "docroot": "", "features": {"docsearch_disabled": false}, "global_analytics_code": null, "language": "", "page": "README", "programming_language": "", "project": "", "source_suffix": ".md", "subprojects": {}, "theme": "", "user_analytics_code": null, "version": ""}</script>
<!--
Using this variable directly instead of using `JSON.parse` is deprecated.
The READTHEDOCS_DATA global variable will be removed in the future.
-->
<script type="text/javascript">
READTHEDOCS_DATA = JSON.parse(document.getElementById('READTHEDOCS_DATA').innerHTML);
</script>
<script type="text/javascript" src="https://assets.readthedocs.org/static/javascript/readthedocs-analytics.js" async="async"></script>
<!-- end RTD <extrahead> -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>
Back to top
</button>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<header class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="index.html">
<img src="_static/logo.png" class="logo__image only-light" alt="CV Analysis Service 2.5.2 documentation - Home"/>
<script>document.write(`<img src="_static/logo.png" class="logo__image only-dark" alt="CV Analysis Service 2.5.2 documentation - Home"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item current active">
<a class="nav-link nav-internal" href="#">
cv-analysis - Visual (CV-Based) Document Parsing
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="modules/cv_analysis.html">
cv_analysis package
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="modules/serve.html">
serve module
</a>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
<span class="fa-solid fa-outdent"></span>
</label>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item current active">
<a class="nav-link nav-internal" href="#">
cv-analysis - Visual (CV-Based) Document Parsing
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="modules/cv_analysis.html">
cv_analysis package
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="modules/serve.html">
serve module
</a>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"></div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item active" aria-current="page">cv-analysis...</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<section id="cv-analysis-visual-cv-based-document-parsing">
<h1>cv-analysis - Visual (CV-Based) Document Parsing<a class="headerlink" href="#cv-analysis-visual-cv-based-document-parsing" title="Link to this heading">#</a></h1>
<p>parse_pdf()
This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
previous redactions in documents.</p>
<section id="api">
<h2>API<a class="headerlink" href="#api" title="Link to this heading">#</a></h2>
<p>Input message:</p>
<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;targetFilePath&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="nt">&quot;pdf&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;absolute file path&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;vlp_output&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;absolute file path&quot;</span>
<span class="w"> </span><span class="p">},</span>
<span class="w"> </span><span class="nt">&quot;responseFilePath&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;absolute file path&quot;</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;operation&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;table_image_inference&quot;</span>
<span class="p">}</span>
</pre></div>
</div>
<p>Response is uploaded to the storage as specified in the <code class="docutils literal notranslate"><span class="pre">responseFilePath</span></code> field. The structure is as follows:</p>
<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
<span class="w"> </span><span class="err">...</span><span class="p">,</span>
<span class="w"> </span><span class="nt">&quot;data&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
<span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="err">&#39;pageNum&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span>
<span class="w"> </span><span class="err">&#39;bbox&#39;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="err">&#39;x</span><span class="mi">1</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mf">55.3407</span><span class="p">,</span>
<span class="w"> </span><span class="err">&#39;y</span><span class="mi">1</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mf">247.0246</span><span class="p">,</span>
<span class="w"> </span><span class="err">&#39;x</span><span class="mi">2</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mf">558.5602</span><span class="p">,</span>
<span class="w"> </span><span class="err">&#39;y</span><span class="mi">2</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mf">598.0585</span>
<span class="w"> </span><span class="p">},</span>
<span class="w"> </span><span class="err">&#39;uuid&#39;</span><span class="p">:</span><span class="w"> </span><span class="err">&#39;</span><span class="mi">2</span><span class="err">b</span><span class="mi">10</span><span class="err">c</span><span class="mi">1</span><span class="err">a</span><span class="mi">2-393</span><span class="err">c</span><span class="mi">-4</span><span class="kc">f</span><span class="err">ca</span><span class="mi">-</span><span class="err">b</span><span class="mf">9e3-0</span><span class="err">ad</span><span class="mi">5</span><span class="err">b</span><span class="mi">774</span><span class="err">ac</span><span class="mi">84</span><span class="err">&#39;</span><span class="p">,</span>
<span class="w"> </span><span class="err">&#39;label&#39;</span><span class="p">:</span><span class="w"> </span><span class="err">&#39;</span><span class="kc">ta</span><span class="err">ble&#39;</span><span class="p">,</span>
<span class="w"> </span><span class="err">&#39;</span><span class="kc">ta</span><span class="err">bleLi</span><span class="kc">nes</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
<span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="err">&#39;x</span><span class="mi">1</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span>
<span class="w"> </span><span class="err">&#39;y</span><span class="mi">1</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">16</span><span class="p">,</span>
<span class="w"> </span><span class="err">&#39;x</span><span class="mi">2</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">1399</span><span class="p">,</span>
<span class="w"> </span><span class="err">&#39;y</span><span class="mi">2</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">16</span>
<span class="w"> </span><span class="p">},</span>
<span class="w"> </span><span class="err">...</span>
<span class="w"> </span><span class="p">],</span>
<span class="w"> </span><span class="err">&#39;imageI</span><span class="kc">nf</span><span class="err">o&#39;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="err">&#39;heigh</span><span class="kc">t</span><span class="err">&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">693</span><span class="p">,</span>
<span class="w"> </span><span class="err">&#39;wid</span><span class="kc">t</span><span class="err">h&#39;</span><span class="p">:</span><span class="w"> </span><span class="mi">1414</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">},</span>
<span class="w"> </span><span class="err">...</span>
<span class="w"> </span><span class="p">]</span>
<span class="p">}</span>
</pre></div>
</div>
</section>
<section id="installation">
<h2>Installation<a class="headerlink" href="#installation" title="Link to this heading">#</a></h2>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>git<span class="w"> </span>clone<span class="w"> </span>ssh://git@git.iqser.com:2222/rr/cv-analysis.git
<span class="nb">cd</span><span class="w"> </span>cv-analysis
python<span class="w"> </span>-m<span class="w"> </span>venv<span class="w"> </span>env
<span class="nb">source</span><span class="w"> </span>env/bin/activate
pip<span class="w"> </span>install<span class="w"> </span>-e<span class="w"> </span>.
pip<span class="w"> </span>install<span class="w"> </span>-r<span class="w"> </span>requirements.txt
dvc<span class="w"> </span>pull
</pre></div>
</div>
</section>
<section id="usage">
<h2>Usage<a class="headerlink" href="#usage" title="Link to this heading">#</a></h2>
<section id="as-an-api">
<h3>As an API<a class="headerlink" href="#as-an-api" title="Link to this heading">#</a></h3>
<p>The module provided functions for the individual tasks that all return some kind of collection of points, depending on
the specific task.</p>
<section id="redaction-detection-api">
<h4>Redaction Detection (API)<a class="headerlink" href="#redaction-detection-api" title="Link to this heading">#</a></h4>
<p>The below snippet shows hot to find the outlines of previous redactions.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">cv_analysis.redaction_detection</span> <span class="kn">import</span> <span class="n">find_redactions</span>
<span class="kn">import</span> <span class="nn">pdf2image</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="n">pdf_path</span> <span class="o">=</span> <span class="o">...</span>
<span class="n">page_index</span> <span class="o">=</span> <span class="o">...</span>
<span class="n">page</span> <span class="o">=</span> <span class="n">pdf2image</span><span class="o">.</span><span class="n">convert_from_path</span><span class="p">(</span><span class="n">pdf_path</span><span class="p">,</span> <span class="n">first_page</span><span class="o">=</span><span class="n">page_index</span><span class="p">,</span> <span class="n">last_page</span><span class="o">=</span><span class="n">page_index</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">page</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">page</span><span class="p">)</span>
<span class="n">redaction_contours</span> <span class="o">=</span> <span class="n">find_redactions</span><span class="p">(</span><span class="n">page</span><span class="p">)</span>
</pre></div>
</div>
</section>
</section>
</section>
<section id="as-a-cli-tool">
<h2>As a CLI Tool<a class="headerlink" href="#as-a-cli-tool" title="Link to this heading">#</a></h2>
<p>Core API functionalities can be used through a CLI.</p>
<section id="table-parsing">
<h3>Table Parsing<a class="headerlink" href="#table-parsing" title="Link to this heading">#</a></h3>
<p>The tables parsing utility detects and segments tables into individual cells.</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">7</span><span class="w"> </span>--type<span class="w"> </span>table
</pre></div>
</div>
<p>The below image shows a parsed table, where each table cell has been detected individually.</p>
<p><img alt="Table Parsing Demonstration" src="_images/table_parsing.png" /></p>
</section>
<section id="redaction-detection-cli">
<h3>Redaction Detection (CLI)<a class="headerlink" href="#redaction-detection-cli" title="Link to this heading">#</a></h3>
<p>The redaction detection utility detects previous redactions in PDFs (filled black rectangles).</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">2</span><span class="w"> </span>--type<span class="w"> </span>redaction
</pre></div>
</div>
<p>The below image shows the detected redactions with green outlines.</p>
<p><img alt="Redaction Detection Demonstration" src="_images/redaction_detection.png" /></p>
</section>
<section id="layout-parsing">
<h3>Layout Parsing<a class="headerlink" href="#layout-parsing" title="Link to this heading">#</a></h3>
<p>The layout parsing utility detects elements such as paragraphs, tables and figures.</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">7</span><span class="w"> </span>--type<span class="w"> </span>layout
</pre></div>
</div>
<p>The below image shows the detected layout elements on a page.</p>
<p><img alt="Layout Parsing Demonstration" src="_images/layout_parsing.png" /></p>
</section>
<section id="figure-detection">
<h3>Figure Detection<a class="headerlink" href="#figure-detection" title="Link to this heading">#</a></h3>
<p>The figure detection utility detects figures specifically, which can be missed by the generic layout parsing utility.</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">3</span><span class="w"> </span>--type<span class="w"> </span>figure
</pre></div>
</div>
<p>The below image shows the detected figure on a page.</p>
<p><img alt="Figure Detection Demonstration" src="_images/figure_detection.png" /></p>
</section>
</section>
<section id="running-as-a-service">
<h2>Running as a service<a class="headerlink" href="#running-as-a-service" title="Link to this heading">#</a></h2>
<section id="building">
<h3>Building<a class="headerlink" href="#building" title="Link to this heading">#</a></h3>
<p>Build base image</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>bash<span class="w"> </span>setup/docker.sh
</pre></div>
</div>
<p>Build head image</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>build<span class="w"> </span>-f<span class="w"> </span>Dockerfile<span class="w"> </span>-t<span class="w"> </span>cv-analysis<span class="w"> </span>.<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">BASE_ROOT</span><span class="o">=</span><span class="s2">&quot;&quot;</span>
</pre></div>
</div>
</section>
<section id="usage-service">
<h3>Usage (service)<a class="headerlink" href="#usage-service" title="Link to this heading">#</a></h3>
<p>Shell 1</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>run<span class="w"> </span>--rm<span class="w"> </span>--net<span class="o">=</span>host<span class="w"> </span>--rm<span class="w"> </span>cv-analysis
</pre></div>
</div>
<p>Shell 2</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/client_mock.py<span class="w"> </span>--pdf_path<span class="w"> </span>/path/to/a/pdf
</pre></div>
</div>
</section>
</section>
</section>
</article>
<footer class="prev-next-footer">
<div class="prev-next-area">
<a class="left-prev"
href="index.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Welcome to CV Analysis Service documentation!</p>
</div>
</a>
<a class="right-next"
href="modules/cv_analysis.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">cv_analysis package</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div
id="pst-page-navigation-heading-2"
class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#api">API</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#installation">Installation</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#usage">Usage</a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#as-an-api">As an API</a><ul class="nav section-nav flex-column">
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#redaction-detection-api">Redaction Detection (API)</a></li>
</ul>
</li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#as-a-cli-tool">As a CLI Tool</a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#table-parsing">Table Parsing</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#redaction-detection-cli">Redaction Detection (CLI)</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#layout-parsing">Layout Parsing</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#figure-detection">Figure Detection</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#running-as-a-service">Running as a service</a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#building">Building</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#usage-service">Usage (service)</a></li>
</ul>
</li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection sourcelink">
<a href="_sources/README.md.txt">
<i class="fa-solid fa-file-lines"></i> Show Source
</a>
</div>
</div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
<script src="_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright All rights reserved.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 7.3.7.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
</p></div>
</div>
</div>
</footer>
</body>
</html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 707 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 568 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 566 KiB

View File

@ -1,178 +0,0 @@
# cv-analysis - Visual (CV-Based) Document Parsing
parse_pdf()
This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
previous redactions in documents.
## API
Input message:
```json
{
"targetFilePath": {
"pdf": "absolute file path",
"vlp_output": "absolute file path"
},
"responseFilePath": "absolute file path",
"operation": "table_image_inference"
}
```
Response is uploaded to the storage as specified in the `responseFilePath` field. The structure is as follows:
```json
{
...,
"data": [
{
'pageNum': 0,
'bbox': {
'x1': 55.3407,
'y1': 247.0246,
'x2': 558.5602,
'y2': 598.0585
},
'uuid': '2b10c1a2-393c-4fca-b9e3-0ad5b774ac84',
'label': 'table',
'tableLines': [
{
'x1': 0,
'y1': 16,
'x2': 1399,
'y2': 16
},
...
],
'imageInfo': {
'height': 693,
'width': 1414
}
},
...
]
}
```
## Installation
```bash
git clone ssh://git@git.iqser.com:2222/rr/cv-analysis.git
cd cv-analysis
python -m venv env
source env/bin/activate
pip install -e .
pip install -r requirements.txt
dvc pull
```
## Usage
### As an API
The module provided functions for the individual tasks that all return some kind of collection of points, depending on
the specific task.
#### Redaction Detection (API)
The below snippet shows hot to find the outlines of previous redactions.
```python
from cv_analysis.redaction_detection import find_redactions
import pdf2image
import numpy as np
pdf_path = ...
page_index = ...
page = pdf2image.convert_from_path(pdf_path, first_page=page_index, last_page=page_index)[0]
page = np.array(page)
redaction_contours = find_redactions(page)
```
## As a CLI Tool
Core API functionalities can be used through a CLI.
### Table Parsing
The tables parsing utility detects and segments tables into individual cells.
```bash
python scripts/annotate.py data/test_pdf.pdf 7 --type table
```
The below image shows a parsed table, where each table cell has been detected individually.
![Table Parsing Demonstration](data/table_parsing.png)
### Redaction Detection (CLI)
The redaction detection utility detects previous redactions in PDFs (filled black rectangles).
```bash
python scripts/annotate.py data/test_pdf.pdf 2 --type redaction
```
The below image shows the detected redactions with green outlines.
![Redaction Detection Demonstration](data/redaction_detection.png)
### Layout Parsing
The layout parsing utility detects elements such as paragraphs, tables and figures.
```bash
python scripts/annotate.py data/test_pdf.pdf 7 --type layout
```
The below image shows the detected layout elements on a page.
![Layout Parsing Demonstration](data/layout_parsing.png)
### Figure Detection
The figure detection utility detects figures specifically, which can be missed by the generic layout parsing utility.
```bash
python scripts/annotate.py data/test_pdf.pdf 3 --type figure
```
The below image shows the detected figure on a page.
![Figure Detection Demonstration](data/figure_detection.png)
## Running as a service
### Building
Build base image
```bash
bash setup/docker.sh
```
Build head image
```bash
docker build -f Dockerfile -t cv-analysis . --build-arg BASE_ROOT=""
```
### Usage (service)
Shell 1
```bash
docker run --rm --net=host --rm cv-analysis
```
Shell 2
```bash
python scripts/client_mock.py --pdf_path /path/to/a/pdf
```

View File

@ -1,37 +0,0 @@
.. Keyword Extraction Service documentation master file, created by
sphinx-quickstart on Mon Sep 12 12:04:24 2022.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
=============================================
Welcome to CV Analysis Service documentation!
=============================================
.. note::
If you'd like to change the looks of things 👉 https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html
Table of Contents
-----------------
.. toctree::
:maxdepth: 3
:caption: README
README.md
.. toctree::
:maxdepth: 3
:caption: Modules
modules/cv_analysis
modules/serve
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

@ -1,7 +0,0 @@
cv\_analysis.config module
==========================
.. automodule:: cv_analysis.config
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.figure\_detection.figure\_detection module
=======================================================
.. automodule:: cv_analysis.figure_detection.figure_detection
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.figure\_detection.figures module
=============================================
.. automodule:: cv_analysis.figure_detection.figures
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,17 +0,0 @@
cv\_analysis.figure\_detection package
======================================
.. automodule:: cv_analysis.figure_detection
:members:
:undoc-members:
:show-inheritance:
Submodules
----------
.. toctree::
:maxdepth: 4
cv_analysis.figure_detection.figure_detection
cv_analysis.figure_detection.figures
cv_analysis.figure_detection.text

View File

@ -1,7 +0,0 @@
cv\_analysis.figure\_detection.text module
==========================================
.. automodule:: cv_analysis.figure_detection.text
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.layout\_parsing module
===================================
.. automodule:: cv_analysis.layout_parsing
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.locations module
=============================
.. automodule:: cv_analysis.locations
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.redaction\_detection module
========================================
.. automodule:: cv_analysis.redaction_detection
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,30 +0,0 @@
cv\_analysis package
====================
.. automodule:: cv_analysis
:members:
:undoc-members:
:show-inheritance:
Subpackages
-----------
.. toctree::
:maxdepth: 4
cv_analysis.figure_detection
cv_analysis.server
cv_analysis.utils
Submodules
----------
.. toctree::
:maxdepth: 4
cv_analysis.config
cv_analysis.layout_parsing
cv_analysis.locations
cv_analysis.redaction_detection
cv_analysis.table_inference
cv_analysis.table_parsing

View File

@ -1,7 +0,0 @@
cv\_analysis.server.pipeline module
===================================
.. automodule:: cv_analysis.server.pipeline
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,15 +0,0 @@
cv\_analysis.server package
===========================
.. automodule:: cv_analysis.server
:members:
:undoc-members:
:show-inheritance:
Submodules
----------
.. toctree::
:maxdepth: 4
cv_analysis.server.pipeline

View File

@ -1,7 +0,0 @@
cv\_analysis.table\_inference module
====================================
.. automodule:: cv_analysis.table_inference
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.table\_parsing module
==================================
.. automodule:: cv_analysis.table_parsing
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.utils.annotate module
==================================
.. automodule:: cv_analysis.utils.annotate
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.utils.banner module
================================
.. automodule:: cv_analysis.utils.banner
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.utils.connect\_rects module
========================================
.. automodule:: cv_analysis.utils.connect_rects
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.utils.display module
=================================
.. automodule:: cv_analysis.utils.display
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.utils.draw module
==============================
.. automodule:: cv_analysis.utils.draw
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.utils.filters module
=================================
.. automodule:: cv_analysis.utils.filters
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.utils.image\_extraction module
===========================================
.. automodule:: cv_analysis.utils.image_extraction
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.utils.open\_pdf module
===================================
.. automodule:: cv_analysis.utils.open_pdf
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.utils.postprocessing module
========================================
.. automodule:: cv_analysis.utils.postprocessing
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.utils.preprocessing module
=======================================
.. automodule:: cv_analysis.utils.preprocessing
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,28 +0,0 @@
cv\_analysis.utils package
==========================
.. automodule:: cv_analysis.utils
:members:
:undoc-members:
:show-inheritance:
Submodules
----------
.. toctree::
:maxdepth: 4
cv_analysis.utils.annotate
cv_analysis.utils.banner
cv_analysis.utils.connect_rects
cv_analysis.utils.display
cv_analysis.utils.draw
cv_analysis.utils.filters
cv_analysis.utils.image_extraction
cv_analysis.utils.open_pdf
cv_analysis.utils.postprocessing
cv_analysis.utils.preprocessing
cv_analysis.utils.structures
cv_analysis.utils.test_metrics
cv_analysis.utils.utils
cv_analysis.utils.visual_logging

View File

@ -1,7 +0,0 @@
cv\_analysis.utils.structures module
====================================
.. automodule:: cv_analysis.utils.structures
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.utils.test\_metrics module
=======================================
.. automodule:: cv_analysis.utils.test_metrics
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.utils.utils module
===============================
.. automodule:: cv_analysis.utils.utils
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
cv\_analysis.utils.visual\_logging module
=========================================
.. automodule:: cv_analysis.utils.visual_logging
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
serve module
============
.. automodule:: serve
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,925 +0,0 @@
/*
* basic.css
* ~~~~~~~~~
*
* Sphinx stylesheet -- basic theme.
*
* :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS.
* :license: BSD, see LICENSE for details.
*
*/
/* -- main layout ----------------------------------------------------------- */
div.clearer {
clear: both;
}
div.section::after {
display: block;
content: '';
clear: left;
}
/* -- relbar ---------------------------------------------------------------- */
div.related {
width: 100%;
font-size: 90%;
}
div.related h3 {
display: none;
}
div.related ul {
margin: 0;
padding: 0 0 0 10px;
list-style: none;
}
div.related li {
display: inline;
}
div.related li.right {
float: right;
margin-right: 5px;
}
/* -- sidebar --------------------------------------------------------------- */
div.sphinxsidebarwrapper {
padding: 10px 5px 0 10px;
}
div.sphinxsidebar {
float: left;
width: 270px;
margin-left: -100%;
font-size: 90%;
word-wrap: break-word;
overflow-wrap : break-word;
}
div.sphinxsidebar ul {
list-style: none;
}
div.sphinxsidebar ul ul,
div.sphinxsidebar ul.want-points {
margin-left: 20px;
list-style: square;
}
div.sphinxsidebar ul ul {
margin-top: 0;
margin-bottom: 0;
}
div.sphinxsidebar form {
margin-top: 10px;
}
div.sphinxsidebar input {
border: 1px solid #98dbcc;
font-family: sans-serif;
font-size: 1em;
}
div.sphinxsidebar #searchbox form.search {
overflow: hidden;
}
div.sphinxsidebar #searchbox input[type="text"] {
float: left;
width: 80%;
padding: 0.25em;
box-sizing: border-box;
}
div.sphinxsidebar #searchbox input[type="submit"] {
float: left;
width: 20%;
border-left: none;
padding: 0.25em;
box-sizing: border-box;
}
img {
border: 0;
max-width: 100%;
}
/* -- search page ----------------------------------------------------------- */
ul.search {
margin: 10px 0 0 20px;
padding: 0;
}
ul.search li {
padding: 5px 0 5px 20px;
background-image: url(file.png);
background-repeat: no-repeat;
background-position: 0 7px;
}
ul.search li a {
font-weight: bold;
}
ul.search li p.context {
color: #888;
margin: 2px 0 0 30px;
text-align: left;
}
ul.keywordmatches li.goodmatch a {
font-weight: bold;
}
/* -- index page ------------------------------------------------------------ */
table.contentstable {
width: 90%;
margin-left: auto;
margin-right: auto;
}
table.contentstable p.biglink {
line-height: 150%;
}
a.biglink {
font-size: 1.3em;
}
span.linkdescr {
font-style: italic;
padding-top: 5px;
font-size: 90%;
}
/* -- general index --------------------------------------------------------- */
table.indextable {
width: 100%;
}
table.indextable td {
text-align: left;
vertical-align: top;
}
table.indextable ul {
margin-top: 0;
margin-bottom: 0;
list-style-type: none;
}
table.indextable > tbody > tr > td > ul {
padding-left: 0em;
}
table.indextable tr.pcap {
height: 10px;
}
table.indextable tr.cap {
margin-top: 10px;
background-color: #f2f2f2;
}
img.toggler {
margin-right: 3px;
margin-top: 3px;
cursor: pointer;
}
div.modindex-jumpbox {
border-top: 1px solid #ddd;
border-bottom: 1px solid #ddd;
margin: 1em 0 1em 0;
padding: 0.4em;
}
div.genindex-jumpbox {
border-top: 1px solid #ddd;
border-bottom: 1px solid #ddd;
margin: 1em 0 1em 0;
padding: 0.4em;
}
/* -- domain module index --------------------------------------------------- */
table.modindextable td {
padding: 2px;
border-collapse: collapse;
}
/* -- general body styles --------------------------------------------------- */
div.body {
min-width: 360px;
max-width: 800px;
}
div.body p, div.body dd, div.body li, div.body blockquote {
-moz-hyphens: auto;
-ms-hyphens: auto;
-webkit-hyphens: auto;
hyphens: auto;
}
a.headerlink {
visibility: hidden;
}
a:visited {
color: #551A8B;
}
h1:hover > a.headerlink,
h2:hover > a.headerlink,
h3:hover > a.headerlink,
h4:hover > a.headerlink,
h5:hover > a.headerlink,
h6:hover > a.headerlink,
dt:hover > a.headerlink,
caption:hover > a.headerlink,
p.caption:hover > a.headerlink,
div.code-block-caption:hover > a.headerlink {
visibility: visible;
}
div.body p.caption {
text-align: inherit;
}
div.body td {
text-align: left;
}
.first {
margin-top: 0 !important;
}
p.rubric {
margin-top: 30px;
font-weight: bold;
}
img.align-left, figure.align-left, .figure.align-left, object.align-left {
clear: left;
float: left;
margin-right: 1em;
}
img.align-right, figure.align-right, .figure.align-right, object.align-right {
clear: right;
float: right;
margin-left: 1em;
}
img.align-center, figure.align-center, .figure.align-center, object.align-center {
display: block;
margin-left: auto;
margin-right: auto;
}
img.align-default, figure.align-default, .figure.align-default {
display: block;
margin-left: auto;
margin-right: auto;
}
.align-left {
text-align: left;
}
.align-center {
text-align: center;
}
.align-default {
text-align: center;
}
.align-right {
text-align: right;
}
/* -- sidebars -------------------------------------------------------------- */
div.sidebar,
aside.sidebar {
margin: 0 0 0.5em 1em;
border: 1px solid #ddb;
padding: 7px;
background-color: #ffe;
width: 40%;
float: right;
clear: right;
overflow-x: auto;
}
p.sidebar-title {
font-weight: bold;
}
nav.contents,
aside.topic,
div.admonition, div.topic, blockquote {
clear: left;
}
/* -- topics ---------------------------------------------------------------- */
nav.contents,
aside.topic,
div.topic {
border: 1px solid #ccc;
padding: 7px;
margin: 10px 0 10px 0;
}
p.topic-title {
font-size: 1.1em;
font-weight: bold;
margin-top: 10px;
}
/* -- admonitions ----------------------------------------------------------- */
div.admonition {
margin-top: 10px;
margin-bottom: 10px;
padding: 7px;
}
div.admonition dt {
font-weight: bold;
}
p.admonition-title {
margin: 0px 10px 5px 0px;
font-weight: bold;
}
div.body p.centered {
text-align: center;
margin-top: 25px;
}
/* -- content of sidebars/topics/admonitions -------------------------------- */
div.sidebar > :last-child,
aside.sidebar > :last-child,
nav.contents > :last-child,
aside.topic > :last-child,
div.topic > :last-child,
div.admonition > :last-child {
margin-bottom: 0;
}
div.sidebar::after,
aside.sidebar::after,
nav.contents::after,
aside.topic::after,
div.topic::after,
div.admonition::after,
blockquote::after {
display: block;
content: '';
clear: both;
}
/* -- tables ---------------------------------------------------------------- */
table.docutils {
margin-top: 10px;
margin-bottom: 10px;
border: 0;
border-collapse: collapse;
}
table.align-center {
margin-left: auto;
margin-right: auto;
}
table.align-default {
margin-left: auto;
margin-right: auto;
}
table caption span.caption-number {
font-style: italic;
}
table caption span.caption-text {
}
table.docutils td, table.docutils th {
padding: 1px 8px 1px 5px;
border-top: 0;
border-left: 0;
border-right: 0;
border-bottom: 1px solid #aaa;
}
th {
text-align: left;
padding-right: 5px;
}
table.citation {
border-left: solid 1px gray;
margin-left: 1px;
}
table.citation td {
border-bottom: none;
}
th > :first-child,
td > :first-child {
margin-top: 0px;
}
th > :last-child,
td > :last-child {
margin-bottom: 0px;
}
/* -- figures --------------------------------------------------------------- */
div.figure, figure {
margin: 0.5em;
padding: 0.5em;
}
div.figure p.caption, figcaption {
padding: 0.3em;
}
div.figure p.caption span.caption-number,
figcaption span.caption-number {
font-style: italic;
}
div.figure p.caption span.caption-text,
figcaption span.caption-text {
}
/* -- field list styles ----------------------------------------------------- */
table.field-list td, table.field-list th {
border: 0 !important;
}
.field-list ul {
margin: 0;
padding-left: 1em;
}
.field-list p {
margin: 0;
}
.field-name {
-moz-hyphens: manual;
-ms-hyphens: manual;
-webkit-hyphens: manual;
hyphens: manual;
}
/* -- hlist styles ---------------------------------------------------------- */
table.hlist {
margin: 1em 0;
}
table.hlist td {
vertical-align: top;
}
/* -- object description styles --------------------------------------------- */
.sig {
font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
}
.sig-name, code.descname {
background-color: transparent;
font-weight: bold;
}
.sig-name {
font-size: 1.1em;
}
code.descname {
font-size: 1.2em;
}
.sig-prename, code.descclassname {
background-color: transparent;
}
.optional {
font-size: 1.3em;
}
.sig-paren {
font-size: larger;
}
.sig-param.n {
font-style: italic;
}
/* C++ specific styling */
.sig-inline.c-texpr,
.sig-inline.cpp-texpr {
font-family: unset;
}
.sig.c .k, .sig.c .kt,
.sig.cpp .k, .sig.cpp .kt {
color: #0033B3;
}
.sig.c .m,
.sig.cpp .m {
color: #1750EB;
}
.sig.c .s, .sig.c .sc,
.sig.cpp .s, .sig.cpp .sc {
color: #067D17;
}
/* -- other body styles ----------------------------------------------------- */
ol.arabic {
list-style: decimal;
}
ol.loweralpha {
list-style: lower-alpha;
}
ol.upperalpha {
list-style: upper-alpha;
}
ol.lowerroman {
list-style: lower-roman;
}
ol.upperroman {
list-style: upper-roman;
}
:not(li) > ol > li:first-child > :first-child,
:not(li) > ul > li:first-child > :first-child {
margin-top: 0px;
}
:not(li) > ol > li:last-child > :last-child,
:not(li) > ul > li:last-child > :last-child {
margin-bottom: 0px;
}
ol.simple ol p,
ol.simple ul p,
ul.simple ol p,
ul.simple ul p {
margin-top: 0;
}
ol.simple > li:not(:first-child) > p,
ul.simple > li:not(:first-child) > p {
margin-top: 0;
}
ol.simple p,
ul.simple p {
margin-bottom: 0;
}
aside.footnote > span,
div.citation > span {
float: left;
}
aside.footnote > span:last-of-type,
div.citation > span:last-of-type {
padding-right: 0.5em;
}
aside.footnote > p {
margin-left: 2em;
}
div.citation > p {
margin-left: 4em;
}
aside.footnote > p:last-of-type,
div.citation > p:last-of-type {
margin-bottom: 0em;
}
aside.footnote > p:last-of-type:after,
div.citation > p:last-of-type:after {
content: "";
clear: both;
}
dl.field-list {
display: grid;
grid-template-columns: fit-content(30%) auto;
}
dl.field-list > dt {
font-weight: bold;
word-break: break-word;
padding-left: 0.5em;
padding-right: 5px;
}
dl.field-list > dd {
padding-left: 0.5em;
margin-top: 0em;
margin-left: 0em;
margin-bottom: 0em;
}
dl {
margin-bottom: 15px;
}
dd > :first-child {
margin-top: 0px;
}
dd ul, dd table {
margin-bottom: 10px;
}
dd {
margin-top: 3px;
margin-bottom: 10px;
margin-left: 30px;
}
.sig dd {
margin-top: 0px;
margin-bottom: 0px;
}
.sig dl {
margin-top: 0px;
margin-bottom: 0px;
}
dl > dd:last-child,
dl > dd:last-child > :last-child {
margin-bottom: 0;
}
dt:target, span.highlighted {
background-color: #fbe54e;
}
rect.highlighted {
fill: #fbe54e;
}
dl.glossary dt {
font-weight: bold;
font-size: 1.1em;
}
.versionmodified {
font-style: italic;
}
.system-message {
background-color: #fda;
padding: 5px;
border: 3px solid red;
}
.footnote:target {
background-color: #ffa;
}
.line-block {
display: block;
margin-top: 1em;
margin-bottom: 1em;
}
.line-block .line-block {
margin-top: 0;
margin-bottom: 0;
margin-left: 1.5em;
}
.guilabel, .menuselection {
font-family: sans-serif;
}
.accelerator {
text-decoration: underline;
}
.classifier {
font-style: oblique;
}
.classifier:before {
font-style: normal;
margin: 0 0.5em;
content: ":";
display: inline-block;
}
abbr, acronym {
border-bottom: dotted 1px;
cursor: help;
}
.translated {
background-color: rgba(207, 255, 207, 0.2)
}
.untranslated {
background-color: rgba(255, 207, 207, 0.2)
}
/* -- code displays --------------------------------------------------------- */
pre {
overflow: auto;
overflow-y: hidden; /* fixes display issues on Chrome browsers */
}
pre, div[class*="highlight-"] {
clear: both;
}
span.pre {
-moz-hyphens: none;
-ms-hyphens: none;
-webkit-hyphens: none;
hyphens: none;
white-space: nowrap;
}
div[class*="highlight-"] {
margin: 1em 0;
}
td.linenos pre {
border: 0;
background-color: transparent;
color: #aaa;
}
table.highlighttable {
display: block;
}
table.highlighttable tbody {
display: block;
}
table.highlighttable tr {
display: flex;
}
table.highlighttable td {
margin: 0;
padding: 0;
}
table.highlighttable td.linenos {
padding-right: 0.5em;
}
table.highlighttable td.code {
flex: 1;
overflow: hidden;
}
.highlight .hll {
display: block;
}
div.highlight pre,
table.highlighttable pre {
margin: 0;
}
div.code-block-caption + div {
margin-top: 0;
}
div.code-block-caption {
margin-top: 1em;
padding: 2px 5px;
font-size: small;
}
div.code-block-caption code {
background-color: transparent;
}
table.highlighttable td.linenos,
span.linenos,
div.highlight span.gp { /* gp: Generic.Prompt */
user-select: none;
-webkit-user-select: text; /* Safari fallback only */
-webkit-user-select: none; /* Chrome/Safari */
-moz-user-select: none; /* Firefox */
-ms-user-select: none; /* IE10+ */
}
div.code-block-caption span.caption-number {
padding: 0.1em 0.3em;
font-style: italic;
}
div.code-block-caption span.caption-text {
}
div.literal-block-wrapper {
margin: 1em 0;
}
code.xref, a code {
background-color: transparent;
font-weight: bold;
}
h1 code, h2 code, h3 code, h4 code, h5 code, h6 code {
background-color: transparent;
}
.viewcode-link {
float: right;
}
.viewcode-back {
float: right;
font-family: sans-serif;
}
div.viewcode-block:target {
margin: -1px -10px;
padding: 0 10px;
}
/* -- math display ---------------------------------------------------------- */
img.math {
vertical-align: middle;
}
div.body div.math p {
text-align: center;
}
span.eqno {
float: right;
}
span.eqno a.headerlink {
position: absolute;
z-index: 1;
}
div.math:hover a.headerlink {
visibility: visible;
}
/* -- printout stylesheet --------------------------------------------------- */
@media print {
div.document,
div.documentwrapper,
div.bodywrapper {
margin: 0 !important;
width: 100%;
}
div.sphinxsidebar,
div.related,
div.footer,
#top-link {
display: none;
}
}

View File

@ -1,156 +0,0 @@
/*
* doctools.js
* ~~~~~~~~~~~
*
* Base JavaScript utilities for all Sphinx HTML documentation.
*
* :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS.
* :license: BSD, see LICENSE for details.
*
*/
"use strict";
const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([
"TEXTAREA",
"INPUT",
"SELECT",
"BUTTON",
]);
const _ready = (callback) => {
if (document.readyState !== "loading") {
callback();
} else {
document.addEventListener("DOMContentLoaded", callback);
}
};
/**
* Small JavaScript module for the documentation.
*/
const Documentation = {
init: () => {
Documentation.initDomainIndexTable();
Documentation.initOnKeyListeners();
},
/**
* i18n support
*/
TRANSLATIONS: {},
PLURAL_EXPR: (n) => (n === 1 ? 0 : 1),
LOCALE: "unknown",
// gettext and ngettext don't access this so that the functions
// can safely bound to a different name (_ = Documentation.gettext)
gettext: (string) => {
const translated = Documentation.TRANSLATIONS[string];
switch (typeof translated) {
case "undefined":
return string; // no translation
case "string":
return translated; // translation exists
default:
return translated[0]; // (singular, plural) translation tuple exists
}
},
ngettext: (singular, plural, n) => {
const translated = Documentation.TRANSLATIONS[singular];
if (typeof translated !== "undefined")
return translated[Documentation.PLURAL_EXPR(n)];
return n === 1 ? singular : plural;
},
addTranslations: (catalog) => {
Object.assign(Documentation.TRANSLATIONS, catalog.messages);
Documentation.PLURAL_EXPR = new Function(
"n",
`return (${catalog.plural_expr})`
);
Documentation.LOCALE = catalog.locale;
},
/**
* helper function to focus on search bar
*/
focusSearchBar: () => {
document.querySelectorAll("input[name=q]")[0]?.focus();
},
/**
* Initialise the domain index toggle buttons
*/
initDomainIndexTable: () => {
const toggler = (el) => {
const idNumber = el.id.substr(7);
const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`);
if (el.src.substr(-9) === "minus.png") {
el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`;
toggledRows.forEach((el) => (el.style.display = "none"));
} else {
el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`;
toggledRows.forEach((el) => (el.style.display = ""));
}
};
const togglerElements = document.querySelectorAll("img.toggler");
togglerElements.forEach((el) =>
el.addEventListener("click", (event) => toggler(event.currentTarget))
);
togglerElements.forEach((el) => (el.style.display = ""));
if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler);
},
initOnKeyListeners: () => {
// only install a listener if it is really needed
if (
!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS &&
!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS
)
return;
document.addEventListener("keydown", (event) => {
// bail for input elements
if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return;
// bail with special keys
if (event.altKey || event.ctrlKey || event.metaKey) return;
if (!event.shiftKey) {
switch (event.key) {
case "ArrowLeft":
if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break;
const prevLink = document.querySelector('link[rel="prev"]');
if (prevLink && prevLink.href) {
window.location.href = prevLink.href;
event.preventDefault();
}
break;
case "ArrowRight":
if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break;
const nextLink = document.querySelector('link[rel="next"]');
if (nextLink && nextLink.href) {
window.location.href = nextLink.href;
event.preventDefault();
}
break;
}
}
// some keyboard layouts may need Shift to get /
switch (event.key) {
case "/":
if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break;
Documentation.focusSearchBar();
event.preventDefault();
}
});
},
};
// quick alias for translations
const _ = Documentation.gettext;
_ready(Documentation.init);

View File

@ -1,13 +0,0 @@
const DOCUMENTATION_OPTIONS = {
VERSION: '2.5.2',
LANGUAGE: 'en',
COLLAPSE_INDEX: false,
BUILDER: 'html',
FILE_SUFFIX: '.html',
LINK_SUFFIX: '.html',
HAS_SOURCE: true,
SOURCELINK_SUFFIX: '.txt',
NAVIGATION_WITH_KEYS: false,
SHOW_SEARCH_SUMMARY: true,
ENABLE_SEARCH_SHORTCUTS: true,
};

Binary file not shown.

Before

Width:  |  Height:  |  Size: 286 B

View File

@ -1,199 +0,0 @@
/*
* language_data.js
* ~~~~~~~~~~~~~~~~
*
* This script contains the language-specific data used by searchtools.js,
* namely the list of stopwords, stemmer, scorer and splitter.
*
* :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS.
* :license: BSD, see LICENSE for details.
*
*/
var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"];
/* Non-minified version is copied as a separate JS file, if available */
/**
* Porter Stemmer
*/
var Stemmer = function() {
var step2list = {
ational: 'ate',
tional: 'tion',
enci: 'ence',
anci: 'ance',
izer: 'ize',
bli: 'ble',
alli: 'al',
entli: 'ent',
eli: 'e',
ousli: 'ous',
ization: 'ize',
ation: 'ate',
ator: 'ate',
alism: 'al',
iveness: 'ive',
fulness: 'ful',
ousness: 'ous',
aliti: 'al',
iviti: 'ive',
biliti: 'ble',
logi: 'log'
};
var step3list = {
icate: 'ic',
ative: '',
alize: 'al',
iciti: 'ic',
ical: 'ic',
ful: '',
ness: ''
};
var c = "[^aeiou]"; // consonant
var v = "[aeiouy]"; // vowel
var C = c + "[^aeiouy]*"; // consonant sequence
var V = v + "[aeiou]*"; // vowel sequence
var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0
var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1
var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1
var s_v = "^(" + C + ")?" + v; // vowel in stem
this.stemWord = function (w) {
var stem;
var suffix;
var firstch;
var origword = w;
if (w.length < 3)
return w;
var re;
var re2;
var re3;
var re4;
firstch = w.substr(0,1);
if (firstch == "y")
w = firstch.toUpperCase() + w.substr(1);
// Step 1a
re = /^(.+?)(ss|i)es$/;
re2 = /^(.+?)([^s])s$/;
if (re.test(w))
w = w.replace(re,"$1$2");
else if (re2.test(w))
w = w.replace(re2,"$1$2");
// Step 1b
re = /^(.+?)eed$/;
re2 = /^(.+?)(ed|ing)$/;
if (re.test(w)) {
var fp = re.exec(w);
re = new RegExp(mgr0);
if (re.test(fp[1])) {
re = /.$/;
w = w.replace(re,"");
}
}
else if (re2.test(w)) {
var fp = re2.exec(w);
stem = fp[1];
re2 = new RegExp(s_v);
if (re2.test(stem)) {
w = stem;
re2 = /(at|bl|iz)$/;
re3 = new RegExp("([^aeiouylsz])\\1$");
re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
if (re2.test(w))
w = w + "e";
else if (re3.test(w)) {
re = /.$/;
w = w.replace(re,"");
}
else if (re4.test(w))
w = w + "e";
}
}
// Step 1c
re = /^(.+?)y$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(s_v);
if (re.test(stem))
w = stem + "i";
}
// Step 2
re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = new RegExp(mgr0);
if (re.test(stem))
w = stem + step2list[suffix];
}
// Step 3
re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = new RegExp(mgr0);
if (re.test(stem))
w = stem + step3list[suffix];
}
// Step 4
re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
re2 = /^(.+?)(s|t)(ion)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(mgr1);
if (re.test(stem))
w = stem;
}
else if (re2.test(w)) {
var fp = re2.exec(w);
stem = fp[1] + fp[2];
re2 = new RegExp(mgr1);
if (re2.test(stem))
w = stem;
}
// Step 5
re = /^(.+?)e$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(mgr1);
re2 = new RegExp(meq1);
re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
w = stem;
}
re = /ll$/;
re2 = new RegExp(mgr1);
if (re.test(w) && re2.test(w)) {
re = /.$/;
w = w.replace(re,"");
}
// and turn initial Y back to y
if (firstch == "y")
w = firstch.toLowerCase() + w.substr(1);
return w;
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 90 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 90 B

View File

@ -1,152 +0,0 @@
html[data-theme="light"] .highlight pre { line-height: 125%; }
html[data-theme="light"] .highlight td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
html[data-theme="light"] .highlight span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
html[data-theme="light"] .highlight td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
html[data-theme="light"] .highlight span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
html[data-theme="light"] .highlight .hll { background-color: #7971292e }
html[data-theme="light"] .highlight { background: #fefefe; color: #545454 }
html[data-theme="light"] .highlight .c { color: #797129 } /* Comment */
html[data-theme="light"] .highlight .err { color: #d91e18 } /* Error */
html[data-theme="light"] .highlight .k { color: #7928a1 } /* Keyword */
html[data-theme="light"] .highlight .l { color: #797129 } /* Literal */
html[data-theme="light"] .highlight .n { color: #545454 } /* Name */
html[data-theme="light"] .highlight .o { color: #008000 } /* Operator */
html[data-theme="light"] .highlight .p { color: #545454 } /* Punctuation */
html[data-theme="light"] .highlight .ch { color: #797129 } /* Comment.Hashbang */
html[data-theme="light"] .highlight .cm { color: #797129 } /* Comment.Multiline */
html[data-theme="light"] .highlight .cp { color: #797129 } /* Comment.Preproc */
html[data-theme="light"] .highlight .cpf { color: #797129 } /* Comment.PreprocFile */
html[data-theme="light"] .highlight .c1 { color: #797129 } /* Comment.Single */
html[data-theme="light"] .highlight .cs { color: #797129 } /* Comment.Special */
html[data-theme="light"] .highlight .gd { color: #007faa } /* Generic.Deleted */
html[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */
html[data-theme="light"] .highlight .gh { color: #007faa } /* Generic.Heading */
html[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */
html[data-theme="light"] .highlight .gu { color: #007faa } /* Generic.Subheading */
html[data-theme="light"] .highlight .kc { color: #7928a1 } /* Keyword.Constant */
html[data-theme="light"] .highlight .kd { color: #7928a1 } /* Keyword.Declaration */
html[data-theme="light"] .highlight .kn { color: #7928a1 } /* Keyword.Namespace */
html[data-theme="light"] .highlight .kp { color: #7928a1 } /* Keyword.Pseudo */
html[data-theme="light"] .highlight .kr { color: #7928a1 } /* Keyword.Reserved */
html[data-theme="light"] .highlight .kt { color: #797129 } /* Keyword.Type */
html[data-theme="light"] .highlight .ld { color: #797129 } /* Literal.Date */
html[data-theme="light"] .highlight .m { color: #797129 } /* Literal.Number */
html[data-theme="light"] .highlight .s { color: #008000 } /* Literal.String */
html[data-theme="light"] .highlight .na { color: #797129 } /* Name.Attribute */
html[data-theme="light"] .highlight .nb { color: #797129 } /* Name.Builtin */
html[data-theme="light"] .highlight .nc { color: #007faa } /* Name.Class */
html[data-theme="light"] .highlight .no { color: #007faa } /* Name.Constant */
html[data-theme="light"] .highlight .nd { color: #797129 } /* Name.Decorator */
html[data-theme="light"] .highlight .ni { color: #008000 } /* Name.Entity */
html[data-theme="light"] .highlight .ne { color: #7928a1 } /* Name.Exception */
html[data-theme="light"] .highlight .nf { color: #007faa } /* Name.Function */
html[data-theme="light"] .highlight .nl { color: #797129 } /* Name.Label */
html[data-theme="light"] .highlight .nn { color: #545454 } /* Name.Namespace */
html[data-theme="light"] .highlight .nx { color: #545454 } /* Name.Other */
html[data-theme="light"] .highlight .py { color: #007faa } /* Name.Property */
html[data-theme="light"] .highlight .nt { color: #007faa } /* Name.Tag */
html[data-theme="light"] .highlight .nv { color: #d91e18 } /* Name.Variable */
html[data-theme="light"] .highlight .ow { color: #7928a1 } /* Operator.Word */
html[data-theme="light"] .highlight .pm { color: #545454 } /* Punctuation.Marker */
html[data-theme="light"] .highlight .w { color: #545454 } /* Text.Whitespace */
html[data-theme="light"] .highlight .mb { color: #797129 } /* Literal.Number.Bin */
html[data-theme="light"] .highlight .mf { color: #797129 } /* Literal.Number.Float */
html[data-theme="light"] .highlight .mh { color: #797129 } /* Literal.Number.Hex */
html[data-theme="light"] .highlight .mi { color: #797129 } /* Literal.Number.Integer */
html[data-theme="light"] .highlight .mo { color: #797129 } /* Literal.Number.Oct */
html[data-theme="light"] .highlight .sa { color: #008000 } /* Literal.String.Affix */
html[data-theme="light"] .highlight .sb { color: #008000 } /* Literal.String.Backtick */
html[data-theme="light"] .highlight .sc { color: #008000 } /* Literal.String.Char */
html[data-theme="light"] .highlight .dl { color: #008000 } /* Literal.String.Delimiter */
html[data-theme="light"] .highlight .sd { color: #008000 } /* Literal.String.Doc */
html[data-theme="light"] .highlight .s2 { color: #008000 } /* Literal.String.Double */
html[data-theme="light"] .highlight .se { color: #008000 } /* Literal.String.Escape */
html[data-theme="light"] .highlight .sh { color: #008000 } /* Literal.String.Heredoc */
html[data-theme="light"] .highlight .si { color: #008000 } /* Literal.String.Interpol */
html[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */
html[data-theme="light"] .highlight .sr { color: #d91e18 } /* Literal.String.Regex */
html[data-theme="light"] .highlight .s1 { color: #008000 } /* Literal.String.Single */
html[data-theme="light"] .highlight .ss { color: #007faa } /* Literal.String.Symbol */
html[data-theme="light"] .highlight .bp { color: #797129 } /* Name.Builtin.Pseudo */
html[data-theme="light"] .highlight .fm { color: #007faa } /* Name.Function.Magic */
html[data-theme="light"] .highlight .vc { color: #d91e18 } /* Name.Variable.Class */
html[data-theme="light"] .highlight .vg { color: #d91e18 } /* Name.Variable.Global */
html[data-theme="light"] .highlight .vi { color: #d91e18 } /* Name.Variable.Instance */
html[data-theme="light"] .highlight .vm { color: #797129 } /* Name.Variable.Magic */
html[data-theme="light"] .highlight .il { color: #797129 } /* Literal.Number.Integer.Long */
html[data-theme="dark"] .highlight pre { line-height: 125%; }
html[data-theme="dark"] .highlight td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
html[data-theme="dark"] .highlight span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
html[data-theme="dark"] .highlight td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
html[data-theme="dark"] .highlight span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
html[data-theme="dark"] .highlight .hll { background-color: #ffd9002e }
html[data-theme="dark"] .highlight { background: #2b2b2b; color: #f8f8f2 }
html[data-theme="dark"] .highlight .c { color: #ffd900 } /* Comment */
html[data-theme="dark"] .highlight .err { color: #ffa07a } /* Error */
html[data-theme="dark"] .highlight .k { color: #dcc6e0 } /* Keyword */
html[data-theme="dark"] .highlight .l { color: #ffd900 } /* Literal */
html[data-theme="dark"] .highlight .n { color: #f8f8f2 } /* Name */
html[data-theme="dark"] .highlight .o { color: #abe338 } /* Operator */
html[data-theme="dark"] .highlight .p { color: #f8f8f2 } /* Punctuation */
html[data-theme="dark"] .highlight .ch { color: #ffd900 } /* Comment.Hashbang */
html[data-theme="dark"] .highlight .cm { color: #ffd900 } /* Comment.Multiline */
html[data-theme="dark"] .highlight .cp { color: #ffd900 } /* Comment.Preproc */
html[data-theme="dark"] .highlight .cpf { color: #ffd900 } /* Comment.PreprocFile */
html[data-theme="dark"] .highlight .c1 { color: #ffd900 } /* Comment.Single */
html[data-theme="dark"] .highlight .cs { color: #ffd900 } /* Comment.Special */
html[data-theme="dark"] .highlight .gd { color: #00e0e0 } /* Generic.Deleted */
html[data-theme="dark"] .highlight .ge { font-style: italic } /* Generic.Emph */
html[data-theme="dark"] .highlight .gh { color: #00e0e0 } /* Generic.Heading */
html[data-theme="dark"] .highlight .gs { font-weight: bold } /* Generic.Strong */
html[data-theme="dark"] .highlight .gu { color: #00e0e0 } /* Generic.Subheading */
html[data-theme="dark"] .highlight .kc { color: #dcc6e0 } /* Keyword.Constant */
html[data-theme="dark"] .highlight .kd { color: #dcc6e0 } /* Keyword.Declaration */
html[data-theme="dark"] .highlight .kn { color: #dcc6e0 } /* Keyword.Namespace */
html[data-theme="dark"] .highlight .kp { color: #dcc6e0 } /* Keyword.Pseudo */
html[data-theme="dark"] .highlight .kr { color: #dcc6e0 } /* Keyword.Reserved */
html[data-theme="dark"] .highlight .kt { color: #ffd900 } /* Keyword.Type */
html[data-theme="dark"] .highlight .ld { color: #ffd900 } /* Literal.Date */
html[data-theme="dark"] .highlight .m { color: #ffd900 } /* Literal.Number */
html[data-theme="dark"] .highlight .s { color: #abe338 } /* Literal.String */
html[data-theme="dark"] .highlight .na { color: #ffd900 } /* Name.Attribute */
html[data-theme="dark"] .highlight .nb { color: #ffd900 } /* Name.Builtin */
html[data-theme="dark"] .highlight .nc { color: #00e0e0 } /* Name.Class */
html[data-theme="dark"] .highlight .no { color: #00e0e0 } /* Name.Constant */
html[data-theme="dark"] .highlight .nd { color: #ffd900 } /* Name.Decorator */
html[data-theme="dark"] .highlight .ni { color: #abe338 } /* Name.Entity */
html[data-theme="dark"] .highlight .ne { color: #dcc6e0 } /* Name.Exception */
html[data-theme="dark"] .highlight .nf { color: #00e0e0 } /* Name.Function */
html[data-theme="dark"] .highlight .nl { color: #ffd900 } /* Name.Label */
html[data-theme="dark"] .highlight .nn { color: #f8f8f2 } /* Name.Namespace */
html[data-theme="dark"] .highlight .nx { color: #f8f8f2 } /* Name.Other */
html[data-theme="dark"] .highlight .py { color: #00e0e0 } /* Name.Property */
html[data-theme="dark"] .highlight .nt { color: #00e0e0 } /* Name.Tag */
html[data-theme="dark"] .highlight .nv { color: #ffa07a } /* Name.Variable */
html[data-theme="dark"] .highlight .ow { color: #dcc6e0 } /* Operator.Word */
html[data-theme="dark"] .highlight .pm { color: #f8f8f2 } /* Punctuation.Marker */
html[data-theme="dark"] .highlight .w { color: #f8f8f2 } /* Text.Whitespace */
html[data-theme="dark"] .highlight .mb { color: #ffd900 } /* Literal.Number.Bin */
html[data-theme="dark"] .highlight .mf { color: #ffd900 } /* Literal.Number.Float */
html[data-theme="dark"] .highlight .mh { color: #ffd900 } /* Literal.Number.Hex */
html[data-theme="dark"] .highlight .mi { color: #ffd900 } /* Literal.Number.Integer */
html[data-theme="dark"] .highlight .mo { color: #ffd900 } /* Literal.Number.Oct */
html[data-theme="dark"] .highlight .sa { color: #abe338 } /* Literal.String.Affix */
html[data-theme="dark"] .highlight .sb { color: #abe338 } /* Literal.String.Backtick */
html[data-theme="dark"] .highlight .sc { color: #abe338 } /* Literal.String.Char */
html[data-theme="dark"] .highlight .dl { color: #abe338 } /* Literal.String.Delimiter */
html[data-theme="dark"] .highlight .sd { color: #abe338 } /* Literal.String.Doc */
html[data-theme="dark"] .highlight .s2 { color: #abe338 } /* Literal.String.Double */
html[data-theme="dark"] .highlight .se { color: #abe338 } /* Literal.String.Escape */
html[data-theme="dark"] .highlight .sh { color: #abe338 } /* Literal.String.Heredoc */
html[data-theme="dark"] .highlight .si { color: #abe338 } /* Literal.String.Interpol */
html[data-theme="dark"] .highlight .sx { color: #abe338 } /* Literal.String.Other */
html[data-theme="dark"] .highlight .sr { color: #ffa07a } /* Literal.String.Regex */
html[data-theme="dark"] .highlight .s1 { color: #abe338 } /* Literal.String.Single */
html[data-theme="dark"] .highlight .ss { color: #00e0e0 } /* Literal.String.Symbol */
html[data-theme="dark"] .highlight .bp { color: #ffd900 } /* Name.Builtin.Pseudo */
html[data-theme="dark"] .highlight .fm { color: #00e0e0 } /* Name.Function.Magic */
html[data-theme="dark"] .highlight .vc { color: #ffa07a } /* Name.Variable.Class */
html[data-theme="dark"] .highlight .vg { color: #ffa07a } /* Name.Variable.Global */
html[data-theme="dark"] .highlight .vi { color: #ffa07a } /* Name.Variable.Instance */
html[data-theme="dark"] .highlight .vm { color: #ffd900 } /* Name.Variable.Magic */
html[data-theme="dark"] .highlight .il { color: #ffd900 } /* Literal.Number.Integer.Long */

File diff suppressed because one or more lines are too long

View File

@ -1,5 +0,0 @@
/*!
* Bootstrap v5.3.2 (https://getbootstrap.com/)
* Copyright 2011-2023 The Bootstrap Authors (https://github.com/twbs/bootstrap/graphs/contributors)
* Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)
*/

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Some files were not shown because too many files have changed in this diff Show More