Compare commits
166 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
799fe331c3 | ||
|
|
dfbfc50556 | ||
|
|
63fbd387a3 | ||
|
|
41dbfc69d9 | ||
|
|
b73e9b2ed9 | ||
|
|
92692281ce | ||
|
|
cb0c58d699 | ||
|
|
eb96403fe2 | ||
|
|
c8daf888c6 | ||
|
|
eb921c365d | ||
|
|
7762f81a4a | ||
|
|
e991cfe1bf | ||
|
|
35c5ee5831 | ||
|
|
e97f34391a | ||
|
|
1fa10721aa | ||
|
|
7f0d0a48db | ||
|
|
333cd498b9 | ||
|
|
9df8c8f936 | ||
|
|
60adf0c381 | ||
|
|
537f605a85 | ||
|
|
66987ab8e9 | ||
|
|
43570142c3 | ||
|
|
d457f49001 | ||
|
|
536928c032 | ||
|
|
dc6183490f | ||
|
|
bbc2d0c8bf | ||
|
|
3462faf8c7 | ||
|
|
b136cc9ff3 | ||
|
|
cf431df1cb | ||
|
|
23406004ed | ||
|
|
b0467f2335 | ||
|
|
e86214f6b7 | ||
|
|
3b8d6eda04 | ||
|
|
3c9ddfcf0f | ||
|
|
b854312b08 | ||
|
|
0f45a25bc8 | ||
|
|
8762363aa9 | ||
|
|
72d26c4712 | ||
|
|
62fb637978 | ||
|
|
802372a504 | ||
|
|
ceb1c00784 | ||
|
|
f1f9e8d2bc | ||
|
|
8fcb6f29fb | ||
|
|
79926b9990 | ||
|
|
6d37622e95 | ||
|
|
6341512250 | ||
|
|
713697b32d | ||
|
|
b6e2540399 | ||
|
|
78b8f18865 | ||
|
|
55795b9e58 | ||
|
|
d2ec32b37c | ||
|
|
3202d95638 | ||
|
|
8c1e30c6df | ||
|
|
127fd7a399 | ||
|
|
560c73a5cb | ||
|
|
d821b93af9 | ||
|
|
2f20ec4ecd | ||
|
|
c2027df1c7 | ||
|
|
a966b49f89 | ||
|
|
8d81551da3 | ||
|
|
626da20afd | ||
|
|
a55b34379a | ||
|
|
2c5c3669a4 | ||
|
|
55b8e209d3 | ||
|
|
ab5096dd86 | ||
|
|
3a5fc32ec8 | ||
|
|
2c6232a1bf | ||
|
|
b43033e6bf | ||
|
|
5d13d8b3d0 | ||
|
|
f213a16cd0 | ||
|
|
9e04693ee1 | ||
|
|
fee357872f | ||
|
|
12bb7ee25f | ||
|
|
f7a0db2651 | ||
|
|
1d3b077ace | ||
|
|
102617fe2f | ||
|
|
0f0fe516d0 | ||
|
|
8de913840f | ||
|
|
aefb73bf28 | ||
|
|
20f8dcd336 | ||
|
|
681e59d24e | ||
|
|
abd350cc42 | ||
|
|
e264c948cf | ||
|
|
ddd680bb4c | ||
|
|
ebdf3cefbf | ||
|
|
ffb10876f5 | ||
|
|
95abb5d5fb | ||
|
|
482673f927 | ||
|
|
a52226d8fe | ||
|
|
fa959332cb | ||
|
|
688217f3cd | ||
|
|
183aad4bf8 | ||
|
|
0a11471191 | ||
|
|
55fb4e06f2 | ||
|
|
306c9b67cf | ||
|
|
60b1c15f82 | ||
|
|
940d7b9277 | ||
|
|
d1c2610bd5 | ||
|
|
50831036f5 | ||
|
|
726aae03a6 | ||
|
|
423842a4c9 | ||
|
|
6426c14fb7 | ||
|
|
6070736df9 | ||
|
|
295a5dea77 | ||
|
|
515cd2309b | ||
|
|
be65ea4ff5 | ||
|
|
85885f929b | ||
|
|
fc7d4ee829 | ||
|
|
83a922deed | ||
|
|
efcd661948 | ||
|
|
4c4ed8ba1e | ||
|
|
415d2b135b | ||
|
|
5538f12d3f | ||
|
|
a08799d7b8 | ||
|
|
db55d4ccf9 | ||
|
|
76940a28ba | ||
|
|
5331cb7c5b | ||
|
|
d44ed1c596 | ||
|
|
384d4b6f73 | ||
|
|
861c3e347e | ||
|
|
9c753fede3 | ||
|
|
fa93255ba1 | ||
|
|
f743bf6171 | ||
|
|
4ee343f6df | ||
|
|
335da13cb5 | ||
|
|
441814f201 | ||
|
|
f9a9a86bc7 | ||
|
|
d98f38607f | ||
|
|
63d2f891e4 | ||
|
|
cb974b19b6 | ||
|
|
019f0da11a | ||
|
|
9adc0e2ced | ||
|
|
11515f6f71 | ||
|
|
ee5f960a3f | ||
|
|
5b991d3a69 | ||
|
|
6033fec952 | ||
|
|
c64f02696d | ||
|
|
79163c33cf | ||
|
|
44dd613715 | ||
|
|
3654ab3c8d | ||
|
|
bb6ba8e0e9 | ||
|
|
6323884683 | ||
|
|
def2d2d108 | ||
|
|
cfbd2e287a | ||
|
|
436824c926 | ||
|
|
1a4ae6735d | ||
|
|
08c0096c07 | ||
|
|
233c6facfd | ||
|
|
4ce6c9bdc9 | ||
|
|
5bb9282da6 | ||
|
|
eef371e2a8 | ||
|
|
ad45e2c1da | ||
|
|
6909e48b60 | ||
|
|
35d046c454 | ||
|
|
f0bba5bbdb | ||
|
|
aee7ee9b97 | ||
|
|
64639bce6d | ||
|
|
16698428f3 | ||
|
|
04f100386d | ||
|
|
fcb2786697 | ||
|
|
7bbe459208 | ||
|
|
d52b2bfe22 | ||
|
|
506ed789f7 | ||
|
|
d14960da08 | ||
|
|
8c5a979cc8 | ||
|
|
555d1973b7 |
@ -10,7 +10,7 @@ omit =
|
|||||||
*/build_venv/*
|
*/build_venv/*
|
||||||
*/incl/*
|
*/incl/*
|
||||||
source =
|
source =
|
||||||
cv_analysis
|
cv_analysis
|
||||||
relative_files = True
|
relative_files = True
|
||||||
data_file = .coverage
|
data_file = .coverage
|
||||||
|
|
||||||
@ -46,4 +46,4 @@ ignore_errors = True
|
|||||||
directory = reports
|
directory = reports
|
||||||
|
|
||||||
[xml]
|
[xml]
|
||||||
output = reports/coverage.xml
|
output = reports/coverage.xml
|
||||||
|
|||||||
@ -97,4 +97,4 @@ target/
|
|||||||
*.swp
|
*.swp
|
||||||
*/*.swp
|
*/*.swp
|
||||||
*/*/*.swp
|
*/*/*.swp
|
||||||
*/*/*/*.swp
|
*/*/*/*.swp
|
||||||
|
|||||||
@ -1,7 +1,10 @@
|
|||||||
[core]
|
[core]
|
||||||
remote = vector
|
remote = azure_remote
|
||||||
autostage = true
|
|
||||||
['remote "vector"']
|
['remote "vector"']
|
||||||
url = ssh://vector.iqser.com/research/nonml_cv_doc_parsing/
|
url = ssh://vector.iqser.com/research/nonml_cv_doc_parsing/
|
||||||
port = 22
|
port = 22
|
||||||
|
['remote "azure_remote"']
|
||||||
|
url = azure://cv-sa-dvc/
|
||||||
|
connection_string = "DefaultEndpointsProtocol=https;AccountName=cvsacricket;AccountKey=KOuTAQ6Mp00ePTT5ObYmgaHlxwS1qukY4QU4Kuk7gy/vldneA+ZiKjaOpEFtqKA6Mtym2gQz8THy+ASts/Y1Bw==;EndpointSuffix=core.windows.net"
|
||||||
|
['remote "local"']
|
||||||
|
url = ../dvc_local_remote
|
||||||
|
|||||||
77
.gitignore
vendored
77
.gitignore
vendored
@ -1,27 +1,52 @@
|
|||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
.pytest*
|
||||||
|
.python-version
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
|
# Project folders
|
||||||
|
scratch/
|
||||||
|
*.vscode/
|
||||||
|
.idea
|
||||||
|
*_app
|
||||||
|
*pytest_cache
|
||||||
|
*joblib
|
||||||
|
*tmp
|
||||||
|
*profiling
|
||||||
|
*logs
|
||||||
|
*docker
|
||||||
|
*drivers
|
||||||
|
*bamboo-specs/target
|
||||||
|
|
||||||
|
# Python specific files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.egg-info/
|
*.py[cod]
|
||||||
deskew_model/
|
*.ipynb
|
||||||
build_venv/
|
*.ipynb_checkpoints
|
||||||
/pdfs/
|
|
||||||
/results/
|
# file extensions
|
||||||
/pdfs/
|
*.log
|
||||||
/env/
|
*.csv
|
||||||
/.idea/
|
*.json
|
||||||
/.idea/.gitignore
|
*.pkl
|
||||||
/.idea/misc.xml
|
*.profile
|
||||||
/.idea/inspectionProfiles/profiles_settings.xml
|
*.cbm
|
||||||
/.idea/table_parsing.iml
|
|
||||||
/.idea/vcs.xml
|
# temp files
|
||||||
/results/
|
*.swp
|
||||||
/table_parsing.egg-info
|
*~
|
||||||
/target/
|
*.un~
|
||||||
/tests/
|
|
||||||
/cv_analysis.egg-info/dependency_links.txt
|
# keep files
|
||||||
/cv_analysis.egg-info/PKG-INFO
|
!notebooks/*.ipynb
|
||||||
/cv_analysis.egg-info/SOURCES.txt
|
|
||||||
/cv_analysis.egg-info/top_level.txt
|
# keep folders
|
||||||
/.vscode/
|
!secrets
|
||||||
/cv_analysis/test/test_data/example_pages.json
|
!data/*
|
||||||
/data/metadata_testing_files.csv
|
!drivers
|
||||||
.coverage
|
|
||||||
/data/
|
# unignore files
|
||||||
|
!bom.*
|
||||||
|
|||||||
30
.gitlab-ci.backup.yml
Normal file
30
.gitlab-ci.backup.yml
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
include:
|
||||||
|
- project: "Gitlab/gitlab"
|
||||||
|
ref: 0.3.0
|
||||||
|
file: "/ci-templates/research/dvc-versioning-build-release.gitlab-ci.yml"
|
||||||
|
|
||||||
|
variables:
|
||||||
|
NEXUS_PROJECT_DIR: red
|
||||||
|
IMAGENAME: "${CI_PROJECT_NAME}"
|
||||||
|
|
||||||
|
#################################
|
||||||
|
# temp. disable integration tests, b/c they don't cover the CV analysis case yet
|
||||||
|
trigger integration tests:
|
||||||
|
rules:
|
||||||
|
- when: never
|
||||||
|
|
||||||
|
release build:
|
||||||
|
stage: release
|
||||||
|
needs:
|
||||||
|
- job: set custom version
|
||||||
|
artifacts: true
|
||||||
|
optional: true
|
||||||
|
- job: calculate patch version
|
||||||
|
artifacts: true
|
||||||
|
optional: true
|
||||||
|
- job: calculate minor version
|
||||||
|
artifacts: true
|
||||||
|
optional: true
|
||||||
|
- job: build docker nexus
|
||||||
|
artifacts: true
|
||||||
|
#################################
|
||||||
35
.gitlab-ci.yml
Normal file
35
.gitlab-ci.yml
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
# CI for services, check gitlab repo for python package CI
|
||||||
|
include:
|
||||||
|
- project: "Gitlab/gitlab"
|
||||||
|
ref: main
|
||||||
|
file: "/ci-templates/research/versioning-build-test-release.gitlab-ci.yml"
|
||||||
|
- project: "Gitlab/gitlab"
|
||||||
|
ref: main
|
||||||
|
file: "/ci-templates/research/docs.gitlab-ci.yml"
|
||||||
|
|
||||||
|
# set project variables here
|
||||||
|
variables:
|
||||||
|
NEXUS_PROJECT_DIR: red # subfolder in Nexus docker-gin where your container will be stored
|
||||||
|
IMAGENAME: $CI_PROJECT_NAME # if the project URL is gitlab.example.com/group-name/project-1, CI_PROJECT_NAME is project-1
|
||||||
|
|
||||||
|
pages:
|
||||||
|
only:
|
||||||
|
- master # KEEP THIS, necessary because `master` branch and not `main` branch
|
||||||
|
|
||||||
|
###################
|
||||||
|
# INTEGRATION TESTS
|
||||||
|
trigger-integration-tests:
|
||||||
|
extends: .integration-tests
|
||||||
|
# ADD THE MODEL BUILD WHICH SHOULD TRIGGER THE INTEGRATION TESTS
|
||||||
|
# needs:
|
||||||
|
# - job: docker-build::model_name
|
||||||
|
# artifacts: true
|
||||||
|
rules:
|
||||||
|
- when: never
|
||||||
|
|
||||||
|
#########
|
||||||
|
# RELEASE
|
||||||
|
release:
|
||||||
|
extends: .release
|
||||||
|
needs:
|
||||||
|
- !reference [.needs-versioning, needs] # leave this line as is
|
||||||
61
.hooks/poetry_version_check.py
Normal file
61
.hooks/poetry_version_check.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import semver
|
||||||
|
from loguru import logger
|
||||||
|
from semver.version import Version
|
||||||
|
|
||||||
|
logger.remove()
|
||||||
|
logger.add(sys.stdout, level="INFO")
|
||||||
|
|
||||||
|
|
||||||
|
def bashcmd(cmds: list) -> str:
|
||||||
|
try:
|
||||||
|
logger.debug(f"running: {' '.join(cmds)}")
|
||||||
|
return subprocess.run(cmds, check=True, capture_output=True, text=True).stdout.strip("\n")
|
||||||
|
except:
|
||||||
|
logger.warning(f"Error executing the following bash command: {' '.join(cmds)}.")
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def get_highest_existing_git_version_tag() -> str:
|
||||||
|
"""Get highest versions from git tags depending on bump level"""
|
||||||
|
try:
|
||||||
|
git_tags = bashcmd(["git", "tag", "-l"]).split()
|
||||||
|
semver_compat_tags = list(filter(Version.is_valid, git_tags))
|
||||||
|
highest_git_version_tag = max(semver_compat_tags, key=semver.version.Version.parse)
|
||||||
|
logger.info(f"Highest git version tag: {highest_git_version_tag}")
|
||||||
|
return highest_git_version_tag
|
||||||
|
except:
|
||||||
|
logger.warning("Error getting git version tags")
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def auto_bump_version() -> bool:
|
||||||
|
active = Path(".autoversion").is_file()
|
||||||
|
logger.debug(f"Automated version bump is set to '{active}'")
|
||||||
|
return active
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
poetry_project_version = bashcmd(["poetry", "version", "-s"])
|
||||||
|
|
||||||
|
logger.info(f"Poetry project version: {poetry_project_version}")
|
||||||
|
|
||||||
|
highest_git_version_tag = get_highest_existing_git_version_tag()
|
||||||
|
|
||||||
|
comparison_result = semver.compare(poetry_project_version, highest_git_version_tag)
|
||||||
|
|
||||||
|
if comparison_result in (-1, 0):
|
||||||
|
logger.warning("Poetry version must be greater than git tag version.")
|
||||||
|
if auto_bump_version():
|
||||||
|
logger.info(bashcmd(["poetry", "version", highest_git_version_tag]))
|
||||||
|
sys.exit(0)
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
logger.info(f"All good: {poetry_project_version} > {highest_git_version_tag}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
72
.pre-commit-config.yaml
Normal file
72
.pre-commit-config.yaml
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
# See https://pre-commit.com for more information
|
||||||
|
# See https://pre-commit.com/hooks.html for more hooks
|
||||||
|
exclude: ^(docs/|notebooks/|data/|src/configs/|tests/|.hooks/|bom.json)
|
||||||
|
default_language_version:
|
||||||
|
python: python3.10
|
||||||
|
repos:
|
||||||
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
|
rev: v5.0.0
|
||||||
|
hooks:
|
||||||
|
- id: trailing-whitespace
|
||||||
|
- id: end-of-file-fixer
|
||||||
|
- id: check-yaml
|
||||||
|
args: [--unsafe] # needed for .gitlab-ci.yml
|
||||||
|
- id: check-toml
|
||||||
|
- id: detect-private-key
|
||||||
|
- id: check-added-large-files
|
||||||
|
args: ['--maxkb=10000']
|
||||||
|
- id: check-case-conflict
|
||||||
|
- id: mixed-line-ending
|
||||||
|
|
||||||
|
# - repo: https://github.com/pre-commit/mirrors-pylint
|
||||||
|
# rev: v3.0.0a5
|
||||||
|
# hooks:
|
||||||
|
# - id: pylint
|
||||||
|
# args:
|
||||||
|
# - --disable=C0111,R0903,E0401
|
||||||
|
# - --max-line-length=120
|
||||||
|
|
||||||
|
- repo: https://github.com/pre-commit/mirrors-isort
|
||||||
|
rev: v5.10.1
|
||||||
|
hooks:
|
||||||
|
- id: isort
|
||||||
|
args:
|
||||||
|
- --profile black
|
||||||
|
|
||||||
|
- repo: https://github.com/psf/black
|
||||||
|
rev: 24.10.0
|
||||||
|
hooks:
|
||||||
|
- id: black
|
||||||
|
# exclude: ^(docs/|notebooks/|data/|src/secrets/)
|
||||||
|
args:
|
||||||
|
- --line-length=120
|
||||||
|
|
||||||
|
- repo: https://github.com/compilerla/conventional-pre-commit
|
||||||
|
rev: v4.0.0
|
||||||
|
hooks:
|
||||||
|
- id: conventional-pre-commit
|
||||||
|
pass_filenames: false
|
||||||
|
stages: [commit-msg]
|
||||||
|
# args: [] # optional: list of Conventional Commits types to allow e.g. [feat, fix, ci, chore, test]
|
||||||
|
|
||||||
|
- repo: local
|
||||||
|
hooks:
|
||||||
|
- id: version-checker
|
||||||
|
name: version-checker
|
||||||
|
entry: python .hooks/poetry_version_check.py
|
||||||
|
language: python
|
||||||
|
always_run: true
|
||||||
|
additional_dependencies:
|
||||||
|
- "semver"
|
||||||
|
- "loguru"
|
||||||
|
|
||||||
|
# - repo: local
|
||||||
|
# hooks:
|
||||||
|
# - id: docker-build-test
|
||||||
|
# name: testing docker build
|
||||||
|
# entry: ./scripts/ops/docker-compose-build-run.sh
|
||||||
|
# language: script
|
||||||
|
# # always_run: true
|
||||||
|
# pass_filenames: false
|
||||||
|
# args: []
|
||||||
|
# stages: [pre-commit]
|
||||||
84
Dockerfile
84
Dockerfile
@ -1,30 +1,78 @@
|
|||||||
FROM python:3.10
|
###############
|
||||||
|
# BUILDER IMAGE
|
||||||
|
FROM python:3.10-slim as builder
|
||||||
|
|
||||||
RUN python -m venv /app/venv
|
ARG GITLAB_USER
|
||||||
ENV PATH="/app/venv/bin:$PATH"
|
ARG GITLAB_ACCESS_TOKEN
|
||||||
|
|
||||||
RUN python -m pip install --upgrade pip
|
ARG PYPI_REGISTRY_RESEARCH=https://gitlab.knecon.com/api/v4/groups/19/-/packages/pypi
|
||||||
|
ARG POETRY_SOURCE_REF_RESEARCH=gitlab-research
|
||||||
|
|
||||||
WORKDIR /app/service
|
ARG PYPI_REGISTRY_RED=https://gitlab.knecon.com/api/v4/groups/12/-/packages/pypi
|
||||||
|
ARG POETRY_SOURCE_REF_RED=gitlab-red
|
||||||
|
|
||||||
COPY ./requirements.txt ./requirements.txt
|
ARG PYPI_REGISTRY_FFORESIGHT=https://gitlab.knecon.com/api/v4/groups/269/-/packages/pypi
|
||||||
RUN python3 -m pip install -r requirements.txt
|
ARG POETRY_SOURCE_REF_FFORESIGHT=gitlab-fforesight
|
||||||
|
|
||||||
COPY ./incl/pyinfra/requirements.txt ./incl/pyinfra/requirements.txt
|
ARG VERSION=dev
|
||||||
RUN python -m pip install -r incl/pyinfra/requirements.txt
|
|
||||||
|
|
||||||
COPY ./incl/pdf2image/requirements.txt ./incl/pdf2image/requirements.txt
|
LABEL maintainer="Research <research@knecon.com>"
|
||||||
RUN python -m pip install -r incl/pdf2image/requirements.txt
|
LABEL version="${VERSION}"
|
||||||
|
|
||||||
COPY ./incl ./incl
|
WORKDIR /app
|
||||||
|
|
||||||
RUN python3 -m pip install -e incl/pyinfra
|
###########
|
||||||
RUN python3 -m pip install -e incl/pdf2image
|
# ENV SETUP
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=true
|
||||||
|
ENV PYTHONUNBUFFERED=true
|
||||||
|
ENV POETRY_HOME=/opt/poetry
|
||||||
|
ENV PATH="$POETRY_HOME/bin:$PATH"
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y curl git bash build-essential libffi-dev libssl-dev && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
RUN curl -sSL https://install.python-poetry.org | python3 -
|
||||||
|
RUN poetry --version
|
||||||
|
|
||||||
|
COPY pyproject.toml poetry.lock ./
|
||||||
|
|
||||||
|
RUN poetry config virtualenvs.create true && \
|
||||||
|
poetry config virtualenvs.in-project true && \
|
||||||
|
poetry config installer.max-workers 10 && \
|
||||||
|
poetry config repositories.${POETRY_SOURCE_REF_RESEARCH} ${PYPI_REGISTRY_RESEARCH} && \
|
||||||
|
poetry config http-basic.${POETRY_SOURCE_REF_RESEARCH} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
|
||||||
|
poetry config repositories.${POETRY_SOURCE_REF_RED} ${PYPI_REGISTRY_RED} && \
|
||||||
|
poetry config http-basic.${POETRY_SOURCE_REF_RED} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
|
||||||
|
poetry config repositories.${POETRY_SOURCE_REF_FFORESIGHT} ${PYPI_REGISTRY_FFORESIGHT} && \
|
||||||
|
poetry config http-basic.${POETRY_SOURCE_REF_FFORESIGHT} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
|
||||||
|
poetry install --without=dev,docs,test -vv --no-interaction --no-root
|
||||||
|
|
||||||
|
##################
|
||||||
|
# COPY SOURCE CODE
|
||||||
|
COPY ./config ./config
|
||||||
COPY ./src ./src
|
COPY ./src ./src
|
||||||
COPY ./cv_analysis ./cv_analysis
|
|
||||||
COPY ./setup.py ./setup.py
|
|
||||||
|
|
||||||
RUN python3 -m pip install -e .
|
###############
|
||||||
|
# WORKING IMAGE
|
||||||
|
FROM python:3.10-slim
|
||||||
|
|
||||||
CMD ["python3", "-u", "src/serve.py"]
|
# COPY BILL OF MATERIALS (BOM)
|
||||||
|
COPY bom.json /bom.json
|
||||||
|
|
||||||
|
# COPY SOURCE CODE FROM BUILDER IMAGE
|
||||||
|
COPY --from=builder /app /app
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENV PATH="/app/.venv/bin:$PATH"
|
||||||
|
|
||||||
|
############
|
||||||
|
# NETWORKING
|
||||||
|
EXPOSE 5000
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
################
|
||||||
|
# LAUNCH COMMAND
|
||||||
|
CMD [ "python", "src/serve.py"]
|
||||||
|
|||||||
94
Makefile
Normal file
94
Makefile
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
.PHONY: \
|
||||||
|
poetry in-project-venv dev-env use-env install install-dev tests \
|
||||||
|
update-version sync-version-with-git \
|
||||||
|
docker docker-build-run docker-build docker-run \
|
||||||
|
docker-rm docker-rm-container docker-rm-image \
|
||||||
|
pre-commit get-licenses prep-commit \
|
||||||
|
docs sphinx_html sphinx_apidoc bom
|
||||||
|
.DEFAULT_GOAL := run
|
||||||
|
|
||||||
|
export DOCKER=docker
|
||||||
|
export DOCKERFILE=Dockerfile
|
||||||
|
export IMAGE_NAME=cv_analysis_service-image
|
||||||
|
export CONTAINER_NAME=cv_analysis_service-container
|
||||||
|
export HOST_PORT=9999
|
||||||
|
export CONTAINER_PORT=9999
|
||||||
|
export PYTHON_VERSION=python3.10
|
||||||
|
|
||||||
|
# all commands should be executed in the root dir or the project,
|
||||||
|
# specific environments should be deactivated
|
||||||
|
|
||||||
|
poetry: in-project-venv use-env dev-env
|
||||||
|
|
||||||
|
in-project-venv:
|
||||||
|
poetry config virtualenvs.in-project true
|
||||||
|
|
||||||
|
use-env:
|
||||||
|
poetry env use ${PYTHON_VERSION}
|
||||||
|
|
||||||
|
dev-env:
|
||||||
|
poetry install --with dev && poetry update
|
||||||
|
|
||||||
|
install:
|
||||||
|
poetry add $(pkg)
|
||||||
|
|
||||||
|
install-dev:
|
||||||
|
poetry add --dev $(pkg)
|
||||||
|
|
||||||
|
requirements:
|
||||||
|
poetry export --without-hashes --output requirements.txt
|
||||||
|
|
||||||
|
update-version:
|
||||||
|
poetry version prerelease
|
||||||
|
|
||||||
|
sync-version-with-git:
|
||||||
|
git pull -p && poetry version $(git rev-list --tags --max-count=1 | git describe --tags --abbrev=0)
|
||||||
|
|
||||||
|
bom:
|
||||||
|
cyclonedx-py poetry -o bom.json
|
||||||
|
|
||||||
|
docker: docker-rm docker-build-run
|
||||||
|
|
||||||
|
docker-build-run: docker-build docker-run
|
||||||
|
|
||||||
|
docker-build:
|
||||||
|
$(DOCKER) build \
|
||||||
|
--no-cache --progress=plain \
|
||||||
|
-t $(IMAGE_NAME) -f $(DOCKERFILE) \
|
||||||
|
--build-arg USERNAME=${USERNAME} \
|
||||||
|
--build-arg TOKEN=${GITLAB_TOKEN} \
|
||||||
|
.
|
||||||
|
|
||||||
|
docker-run:
|
||||||
|
$(DOCKER) run -it --rm -p $(HOST_PORT):$(CONTAINER_PORT)/tcp --name $(CONTAINER_NAME) $(IMAGE_NAME)
|
||||||
|
|
||||||
|
docker-rm: docker-rm-container docker-rm-image
|
||||||
|
|
||||||
|
docker-rm-container:
|
||||||
|
-$(DOCKER) rm $(CONTAINER_NAME)
|
||||||
|
|
||||||
|
docker-rm-image:
|
||||||
|
-$(DOCKER) image rm $(IMAGE_NAME)
|
||||||
|
|
||||||
|
tests:
|
||||||
|
poetry run pytest ./tests
|
||||||
|
|
||||||
|
prep-commit:
|
||||||
|
docs get-license sync-version-with-git update-version pre-commit
|
||||||
|
|
||||||
|
pre-commit:
|
||||||
|
pre-commit run --all-files
|
||||||
|
|
||||||
|
get-licenses:
|
||||||
|
pip-licenses --format=json --order=license --with-urls > pkg-licenses.json
|
||||||
|
|
||||||
|
docs: sphinx_apidoc sphinx_html
|
||||||
|
|
||||||
|
sphinx_html:
|
||||||
|
poetry run sphinx-build -b html docs/source/ docs/build/html -E -a
|
||||||
|
|
||||||
|
sphinx_apidoc:
|
||||||
|
cp ./README.md ./docs/source/README.md && cp -r ./data ./docs/source/data/ && poetry run sphinx-apidoc ./src -o ./docs/source/modules --no-toc --module-first --follow-links --separate --force
|
||||||
|
|
||||||
|
bom:
|
||||||
|
cyclonedx-py poetry -o bom.json
|
||||||
57
README.md
57
README.md
@ -1,8 +1,60 @@
|
|||||||
# cv-analysis — Visual (CV-Based) Document Parsing
|
# cv-analysis - Visual (CV-Based) Document Parsing
|
||||||
|
|
||||||
|
parse_pdf()
|
||||||
This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
|
This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
|
||||||
previous redactions in documents.
|
previous redactions in documents.
|
||||||
|
|
||||||
|
## API
|
||||||
|
|
||||||
|
Input message:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"targetFilePath": {
|
||||||
|
"pdf": "absolute file path",
|
||||||
|
"vlp_output": "absolute file path"
|
||||||
|
},
|
||||||
|
"responseFilePath": "absolute file path",
|
||||||
|
"operation": "table_image_inference"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Response is uploaded to the storage as specified in the `responseFilePath` field. The structure is as follows:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
...,
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
'pageNum': 0,
|
||||||
|
'bbox': {
|
||||||
|
'x1': 55.3407,
|
||||||
|
'y1': 247.0246,
|
||||||
|
'x2': 558.5602,
|
||||||
|
'y2': 598.0585
|
||||||
|
},
|
||||||
|
'uuid': '2b10c1a2-393c-4fca-b9e3-0ad5b774ac84',
|
||||||
|
'label': 'table',
|
||||||
|
'tableLines': [
|
||||||
|
{
|
||||||
|
'x1': 0,
|
||||||
|
'y1': 16,
|
||||||
|
'x2': 1399,
|
||||||
|
'y2': 16
|
||||||
|
},
|
||||||
|
...
|
||||||
|
],
|
||||||
|
'imageInfo': {
|
||||||
|
'height': 693,
|
||||||
|
'width': 1414
|
||||||
|
}
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@ -31,10 +83,9 @@ The below snippet shows hot to find the outlines of previous redactions.
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
from cv_analysis.redaction_detection import find_redactions
|
from cv_analysis.redaction_detection import find_redactions
|
||||||
import pdf2image
|
import pdf2image
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
pdf_path = ...
|
pdf_path = ...
|
||||||
page_index = ...
|
page_index = ...
|
||||||
|
|
||||||
|
|||||||
@ -1,40 +0,0 @@
|
|||||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
|
||||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
|
||||||
<modelVersion>4.0.0</modelVersion>
|
|
||||||
|
|
||||||
<parent>
|
|
||||||
<groupId>com.atlassian.bamboo</groupId>
|
|
||||||
<artifactId>bamboo-specs-parent</artifactId>
|
|
||||||
<version>7.1.2</version>
|
|
||||||
<relativePath/>
|
|
||||||
</parent>
|
|
||||||
|
|
||||||
<artifactId>bamboo-specs</artifactId>
|
|
||||||
<version>1.0.0-SNAPSHOT</version>
|
|
||||||
<packaging>jar</packaging>
|
|
||||||
|
|
||||||
<properties>
|
|
||||||
<sonar.skip>true</sonar.skip>
|
|
||||||
</properties>
|
|
||||||
|
|
||||||
<dependencies>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.atlassian.bamboo</groupId>
|
|
||||||
<artifactId>bamboo-specs-api</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.atlassian.bamboo</groupId>
|
|
||||||
<artifactId>bamboo-specs</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<!-- Test dependencies -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>junit</groupId>
|
|
||||||
<artifactId>junit</artifactId>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
</dependencies>
|
|
||||||
|
|
||||||
<!-- run 'mvn test' to perform offline validation of the plan -->
|
|
||||||
<!-- run 'mvn -Ppublish-specs' to upload the plan to your Bamboo server -->
|
|
||||||
</project>
|
|
||||||
@ -1,178 +0,0 @@
|
|||||||
package buildjob;
|
|
||||||
|
|
||||||
import static com.atlassian.bamboo.specs.builders.task.TestParserTask.createJUnitParserTask;
|
|
||||||
|
|
||||||
import java.time.LocalTime;
|
|
||||||
|
|
||||||
import com.atlassian.bamboo.specs.api.BambooSpec;
|
|
||||||
import com.atlassian.bamboo.specs.api.builders.BambooKey;
|
|
||||||
import com.atlassian.bamboo.specs.api.builders.docker.DockerConfiguration;
|
|
||||||
import com.atlassian.bamboo.specs.api.builders.permission.PermissionType;
|
|
||||||
import com.atlassian.bamboo.specs.api.builders.permission.Permissions;
|
|
||||||
import com.atlassian.bamboo.specs.api.builders.permission.PlanPermissions;
|
|
||||||
import com.atlassian.bamboo.specs.api.builders.plan.Job;
|
|
||||||
import com.atlassian.bamboo.specs.api.builders.plan.Plan;
|
|
||||||
import com.atlassian.bamboo.specs.api.builders.plan.PlanIdentifier;
|
|
||||||
import com.atlassian.bamboo.specs.api.builders.plan.Stage;
|
|
||||||
import com.atlassian.bamboo.specs.api.builders.plan.branches.BranchCleanup;
|
|
||||||
import com.atlassian.bamboo.specs.api.builders.plan.branches.PlanBranchManagement;
|
|
||||||
import com.atlassian.bamboo.specs.api.builders.project.Project;
|
|
||||||
import com.atlassian.bamboo.specs.builders.task.CheckoutItem;
|
|
||||||
import com.atlassian.bamboo.specs.builders.task.InjectVariablesTask;
|
|
||||||
import com.atlassian.bamboo.specs.builders.task.ScriptTask;
|
|
||||||
import com.atlassian.bamboo.specs.builders.task.VcsCheckoutTask;
|
|
||||||
import com.atlassian.bamboo.specs.builders.task.CleanWorkingDirectoryTask;
|
|
||||||
import com.atlassian.bamboo.specs.builders.task.VcsTagTask;
|
|
||||||
import com.atlassian.bamboo.specs.builders.trigger.BitbucketServerTrigger;
|
|
||||||
import com.atlassian.bamboo.specs.builders.trigger.ScheduledTrigger;
|
|
||||||
import com.atlassian.bamboo.specs.model.task.InjectVariablesScope;
|
|
||||||
import com.atlassian.bamboo.specs.api.builders.Variable;
|
|
||||||
import com.atlassian.bamboo.specs.util.BambooServer;
|
|
||||||
import com.atlassian.bamboo.specs.builders.task.ScriptTask;
|
|
||||||
import com.atlassian.bamboo.specs.model.task.ScriptTaskProperties.Location;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Plan configuration for Bamboo.
|
|
||||||
* Learn more on: <a href="https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs">https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs</a>
|
|
||||||
*/
|
|
||||||
@BambooSpec
|
|
||||||
public class PlanSpec {
|
|
||||||
|
|
||||||
private static final String SERVICE_NAME = "cv-analysis";
|
|
||||||
|
|
||||||
private static final String SERVICE_KEY = SERVICE_NAME.toUpperCase().replaceAll("-","").replaceAll("_","");
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Run main to publish plan on Bamboo
|
|
||||||
*/
|
|
||||||
public static void main(final String[] args) throws Exception {
|
|
||||||
//By default credentials are read from the '.credentials' file.
|
|
||||||
BambooServer bambooServer = new BambooServer("http://localhost:8085");
|
|
||||||
|
|
||||||
Plan plan = new PlanSpec().createDockerBuildPlan();
|
|
||||||
bambooServer.publish(plan);
|
|
||||||
PlanPermissions planPermission = new PlanSpec().createPlanPermission(plan.getIdentifier());
|
|
||||||
bambooServer.publish(planPermission);
|
|
||||||
|
|
||||||
Plan secPlan = new PlanSpec().createSecBuild();
|
|
||||||
bambooServer.publish(secPlan);
|
|
||||||
PlanPermissions secPlanPermission = new PlanSpec().createPlanPermission(secPlan.getIdentifier());
|
|
||||||
bambooServer.publish(secPlanPermission);
|
|
||||||
}
|
|
||||||
|
|
||||||
private PlanPermissions createPlanPermission(PlanIdentifier planIdentifier) {
|
|
||||||
Permissions permission = new Permissions()
|
|
||||||
.userPermissions("atlbamboo", PermissionType.EDIT, PermissionType.VIEW, PermissionType.ADMIN, PermissionType.CLONE, PermissionType.BUILD)
|
|
||||||
.groupPermissions("research", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
|
|
||||||
.groupPermissions("Development", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
|
|
||||||
.groupPermissions("QA", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
|
|
||||||
.loggedInUserPermissions(PermissionType.VIEW)
|
|
||||||
.anonymousUserPermissionView();
|
|
||||||
return new PlanPermissions(planIdentifier.getProjectKey(), planIdentifier.getPlanKey()).permissions(permission);
|
|
||||||
}
|
|
||||||
|
|
||||||
private Project project() {
|
|
||||||
return new Project()
|
|
||||||
.name("RED")
|
|
||||||
.key(new BambooKey("RED"));
|
|
||||||
}
|
|
||||||
|
|
||||||
public Plan createDockerBuildPlan() {
|
|
||||||
return new Plan(
|
|
||||||
project(),
|
|
||||||
SERVICE_NAME, new BambooKey(SERVICE_KEY))
|
|
||||||
// .description("Docker build for cv-analysis.")
|
|
||||||
// .variables()
|
|
||||||
.stages(new Stage("Build Stage")
|
|
||||||
.jobs(
|
|
||||||
new Job("Build Job", new BambooKey("BUILD"))
|
|
||||||
.tasks(
|
|
||||||
new CleanWorkingDirectoryTask()
|
|
||||||
.description("Clean working directory.")
|
|
||||||
.enabled(true),
|
|
||||||
new VcsCheckoutTask()
|
|
||||||
.description("Checkout default repository.")
|
|
||||||
.checkoutItems(new CheckoutItem().defaultRepository()),
|
|
||||||
new ScriptTask()
|
|
||||||
.description("Set config and keys.")
|
|
||||||
.location(Location.FILE)
|
|
||||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/key-prepare.sh"),
|
|
||||||
new ScriptTask()
|
|
||||||
.description("Build Docker container.")
|
|
||||||
.location(Location.FILE)
|
|
||||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/docker-build.sh")
|
|
||||||
.argument(SERVICE_NAME),
|
|
||||||
new InjectVariablesTask()
|
|
||||||
.description("Inject git tag.")
|
|
||||||
.path("git.tag")
|
|
||||||
.namespace("g")
|
|
||||||
.scope(InjectVariablesScope.LOCAL),
|
|
||||||
new VcsTagTask()
|
|
||||||
.description("${bamboo.g.gitTag}")
|
|
||||||
.tagName("${bamboo.g.gitTag}")
|
|
||||||
.defaultRepository())
|
|
||||||
.dockerConfiguration(
|
|
||||||
new DockerConfiguration()
|
|
||||||
.image("nexus.iqser.com:5001/infra/release_build:4.5.0")
|
|
||||||
.volume("/var/run/docker.sock", "/var/run/docker.sock")),
|
|
||||||
new Job("Licence Job", new BambooKey("LICENCE"))
|
|
||||||
.enabled(false)
|
|
||||||
.tasks(
|
|
||||||
new VcsCheckoutTask()
|
|
||||||
.description("Checkout default repository.")
|
|
||||||
.checkoutItems(new CheckoutItem().defaultRepository()),
|
|
||||||
new ScriptTask()
|
|
||||||
.description("Build licence.")
|
|
||||||
.location(Location.FILE)
|
|
||||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/create-licence.sh"))
|
|
||||||
.dockerConfiguration(
|
|
||||||
new DockerConfiguration()
|
|
||||||
.image("nexus.iqser.com:5001/infra/maven:3.6.2-jdk-13-3.0.0")
|
|
||||||
.volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml")
|
|
||||||
.volume("/var/run/docker.sock", "/var/run/docker.sock"))))
|
|
||||||
.linkedRepositories("RR / " + SERVICE_NAME)
|
|
||||||
.triggers(
|
|
||||||
new BitbucketServerTrigger())
|
|
||||||
.planBranchManagement(
|
|
||||||
new PlanBranchManagement()
|
|
||||||
.createForVcsBranch()
|
|
||||||
.delete(
|
|
||||||
new BranchCleanup()
|
|
||||||
.whenInactiveInRepositoryAfterDays(14))
|
|
||||||
.notificationForCommitters());
|
|
||||||
}
|
|
||||||
|
|
||||||
public Plan createSecBuild() {
|
|
||||||
return new Plan(project(), SERVICE_NAME + "-Sec", new BambooKey(SERVICE_KEY + "SEC")).description("Security Analysis Plan")
|
|
||||||
.stages(new Stage("Default Stage").jobs(
|
|
||||||
new Job("Sonar Job", new BambooKey("SONAR"))
|
|
||||||
.tasks(
|
|
||||||
new CleanWorkingDirectoryTask()
|
|
||||||
.description("Clean working directory.")
|
|
||||||
.enabled(true),
|
|
||||||
new VcsCheckoutTask()
|
|
||||||
.description("Checkout default repository.")
|
|
||||||
.checkoutItems(new CheckoutItem().defaultRepository()),
|
|
||||||
new ScriptTask()
|
|
||||||
.description("Set config and keys.")
|
|
||||||
.location(Location.FILE)
|
|
||||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/key-prepare.sh"),
|
|
||||||
new ScriptTask()
|
|
||||||
.description("Run Sonarqube scan.")
|
|
||||||
.location(Location.FILE)
|
|
||||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/sonar-scan.sh")
|
|
||||||
.argument(SERVICE_NAME))
|
|
||||||
.dockerConfiguration(
|
|
||||||
new DockerConfiguration()
|
|
||||||
.image("nexus.iqser.com:5001/infra/release_build:4.2.0")
|
|
||||||
.volume("/var/run/docker.sock", "/var/run/docker.sock"))))
|
|
||||||
.linkedRepositories("RR / " + SERVICE_NAME)
|
|
||||||
.triggers(
|
|
||||||
new ScheduledTrigger()
|
|
||||||
.scheduleOnceDaily(LocalTime.of(23, 00)))
|
|
||||||
.planBranchManagement(
|
|
||||||
new PlanBranchManagement()
|
|
||||||
.createForVcsBranchMatching("release.*")
|
|
||||||
.notificationForCommitters());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,19 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
if [[ \"${bamboo_version_tag}\" != \"dev\" ]]
|
|
||||||
then
|
|
||||||
${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
|
|
||||||
-f ${bamboo_build_working_directory}/pom.xml \
|
|
||||||
versions:set \
|
|
||||||
-DnewVersion=${bamboo_version_tag}
|
|
||||||
|
|
||||||
${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
|
|
||||||
-f ${bamboo_build_working_directory}/pom.xml \
|
|
||||||
-B clean deploy \
|
|
||||||
-e -DdeployAtEnd=true \
|
|
||||||
-Dmaven.wagon.http.ssl.insecure=true \
|
|
||||||
-Dmaven.wagon.http.ssl.allowall=true \
|
|
||||||
-Dmaven.wagon.http.ssl.ignore.validity.dates=true \
|
|
||||||
-DaltDeploymentRepository=iqser_release::default::https://nexus.iqser.com/repository/gin4-platform-releases
|
|
||||||
fi
|
|
||||||
@ -1,53 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
SERVICE_NAME=$1
|
|
||||||
|
|
||||||
if [[ "$bamboo_planRepository_branchName" == "master" ]]
|
|
||||||
then
|
|
||||||
branchVersion=$(cat version.yaml | grep -Eo "version: .*" | sed -s 's|version: \(.*\)\..*\..*|\1|g')
|
|
||||||
latestVersion=$( semver $(git tag -l "${branchVersion}.*" ) | tail -n1 )
|
|
||||||
newVersion="$(semver $latestVersion -p -i minor)"
|
|
||||||
echo "new release on master with version $newVersion"
|
|
||||||
elif [[ "$bamboo_planRepository_branchName" == release* ]]
|
|
||||||
then
|
|
||||||
branchVersion=$(echo $bamboo_planRepository_branchName | sed -s 's|release\/\([0-9]\+\.[0-9]\+\)\.x|\1|')
|
|
||||||
latestVersion=$( semver $(git tag -l "${branchVersion}.*" ) | tail -n1 )
|
|
||||||
newVersion="$(semver $latestVersion -p -i patch)"
|
|
||||||
echo "new release on $bamboo_planRepository_branchName with version $newVersion"
|
|
||||||
elif [[ "${bamboo_version_tag}" != "dev" ]]
|
|
||||||
then
|
|
||||||
newVersion="${bamboo_version_tag}"
|
|
||||||
echo "new special version bild with $newVersion"
|
|
||||||
else
|
|
||||||
newVersion="${bamboo_planRepository_1_branch}_${bamboo_buildNumber}"
|
|
||||||
echo "gitTag=${newVersion}" > git.tag
|
|
||||||
echo "dev build with tag ${newVersion}"
|
|
||||||
python3 -m venv build_venv
|
|
||||||
source build_venv/bin/activate
|
|
||||||
python3 -m pip install --upgrade pip
|
|
||||||
|
|
||||||
pip install dvc
|
|
||||||
pip install 'dvc[ssh]'
|
|
||||||
dvc pull
|
|
||||||
|
|
||||||
echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iqser.com/repository/python-combind/simple" >> pip.conf
|
|
||||||
echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
|
|
||||||
docker build -f Dockerfile .
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "gitTag=${newVersion}" > git.tag
|
|
||||||
|
|
||||||
python3 -m venv build_venv
|
|
||||||
source build_venv/bin/activate
|
|
||||||
python3 -m pip install --upgrade pip
|
|
||||||
|
|
||||||
pip install dvc
|
|
||||||
pip install 'dvc[ssh]'
|
|
||||||
dvc pull
|
|
||||||
|
|
||||||
echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iqser.com/repository/python-combind/simple" >> pip.conf
|
|
||||||
docker build -f Dockerfile -t nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion} .
|
|
||||||
echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
|
|
||||||
docker push nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion}
|
|
||||||
@ -1,8 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
mkdir -p ~/.ssh
|
|
||||||
echo "${bamboo_agent_ssh}" | base64 -d >> ~/.ssh/id_rsa
|
|
||||||
echo "host vector.iqser.com" > ~/.ssh/config
|
|
||||||
echo " user bamboo-agent" >> ~/.ssh/config
|
|
||||||
chmod 600 ~/.ssh/config ~/.ssh/id_rsa
|
|
||||||
@ -1,67 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
export JAVA_HOME=/usr/bin/sonar-scanner/jre
|
|
||||||
|
|
||||||
python3 -m venv build_venv
|
|
||||||
source build_venv/bin/activate
|
|
||||||
python3 -m pip install --upgrade pip
|
|
||||||
|
|
||||||
echo "dev setup for unit test and coverage"
|
|
||||||
|
|
||||||
pip install -e incl/pyinfra
|
|
||||||
pip install -r incl/pyinfra/requirements.txt
|
|
||||||
|
|
||||||
pip install -e incl/pdf2image
|
|
||||||
pip install -r incl/pdf2image/requirements.txt
|
|
||||||
|
|
||||||
pip install -e .
|
|
||||||
pip install -r requirements.txt
|
|
||||||
|
|
||||||
|
|
||||||
echo "DVC pull step"
|
|
||||||
dvc pull
|
|
||||||
|
|
||||||
echo "coverage calculation"
|
|
||||||
coverage run -m pytest
|
|
||||||
echo "coverage report generation"
|
|
||||||
coverage report -m
|
|
||||||
coverage xml
|
|
||||||
|
|
||||||
SERVICE_NAME=$1
|
|
||||||
|
|
||||||
echo "dependency-check:aggregate"
|
|
||||||
mkdir -p reports
|
|
||||||
dependency-check --enableExperimental -f JSON -f HTML -f XML \
|
|
||||||
--disableAssembly -s . -o reports --project $SERVICE_NAME --exclude ".git/**" --exclude "venv/**" \
|
|
||||||
--exclude "build_venv/**" --exclude "**/__pycache__/**"
|
|
||||||
|
|
||||||
if [[ -z "${bamboo_repository_pr_key}" ]]
|
|
||||||
then
|
|
||||||
echo "Sonar Scan for branch: ${bamboo_planRepository_1_branch}"
|
|
||||||
/usr/bin/sonar-scanner/bin/sonar-scanner -X\
|
|
||||||
-Dsonar.projectKey=RED_$SERVICE_NAME \
|
|
||||||
-Dsonar.sources=src,cv_analysis \
|
|
||||||
-Dsonar.host.url=https://sonarqube.iqser.com \
|
|
||||||
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
|
|
||||||
-Dsonar.branch.name=${bamboo_planRepository_1_branch} \
|
|
||||||
-Dsonar.dependencyCheck.jsonReportPath=reports/dependency-check-report.json \
|
|
||||||
-Dsonar.dependencyCheck.xmlReportPath=reports/dependency-check-report.xml \
|
|
||||||
-Dsonar.dependencyCheck.htmlReportPath=reports/dependency-check-report.html \
|
|
||||||
-Dsonar.python.coverage.reportPaths=reports/coverage.xml
|
|
||||||
|
|
||||||
else
|
|
||||||
echo "Sonar Scan for PR with key1: ${bamboo_repository_pr_key}"
|
|
||||||
/usr/bin/sonar-scanner/bin/sonar-scanner \
|
|
||||||
-Dsonar.projectKey=RED_$SERVICE_NAME \
|
|
||||||
-Dsonar.sources=src,cv_analysis \
|
|
||||||
-Dsonar.host.url=https://sonarqube.iqser.com \
|
|
||||||
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
|
|
||||||
-Dsonar.pullrequest.key=${bamboo_repository_pr_key} \
|
|
||||||
-Dsonar.pullrequest.branch=${bamboo_repository_pr_sourceBranch} \
|
|
||||||
-Dsonar.pullrequest.base=${bamboo_repository_pr_targetBranch} \
|
|
||||||
-Dsonar.dependencyCheck.jsonReportPath=reports/dependency-check-report.json \
|
|
||||||
-Dsonar.dependencyCheck.xmlReportPath=reports/dependency-check-report.xml \
|
|
||||||
-Dsonar.dependencyCheck.htmlReportPath=reports/dependency-check-report.html \
|
|
||||||
-Dsonar.python.coverage.reportPaths=reports/coverage.xml
|
|
||||||
fi
|
|
||||||
@ -1,22 +0,0 @@
|
|||||||
package buildjob;
|
|
||||||
|
|
||||||
|
|
||||||
import com.atlassian.bamboo.specs.api.builders.plan.Plan;
|
|
||||||
import com.atlassian.bamboo.specs.api.exceptions.PropertiesValidationException;
|
|
||||||
import com.atlassian.bamboo.specs.api.util.EntityPropertiesBuilders;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
public class PlanSpecTest {
|
|
||||||
@Test
|
|
||||||
public void checkYourPlanOffline() throws PropertiesValidationException {
|
|
||||||
Plan plan = new PlanSpec().createDockerBuildPlan();
|
|
||||||
|
|
||||||
EntityPropertiesBuilders.build(plan);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void checkYourSecPlanOffline() throws PropertiesValidationException {
|
|
||||||
Plan secPlan = new PlanSpec().createSecBuild();
|
|
||||||
EntityPropertiesBuilders.build(secPlan);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
67
config/pyinfra.toml
Normal file
67
config/pyinfra.toml
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
|
||||||
|
[asyncio]
|
||||||
|
max_concurrent_tasks = 10
|
||||||
|
|
||||||
|
[dynamic_tenant_queues]
|
||||||
|
enabled = true
|
||||||
|
|
||||||
|
[metrics.prometheus]
|
||||||
|
enabled = true
|
||||||
|
prefix = "redactmanager_cv_analysis_service"
|
||||||
|
|
||||||
|
[tracing]
|
||||||
|
enabled = true
|
||||||
|
# possible values "opentelemetry" | "azure_monitor" (Excpects APPLICATIONINSIGHTS_CONNECTION_STRING environment variable.)
|
||||||
|
type = "azure_monitor"
|
||||||
|
|
||||||
|
[tracing.opentelemetry]
|
||||||
|
endpoint = "http://otel-collector-opentelemetry-collector.otel-collector:4318/v1/traces"
|
||||||
|
service_name = "redactmanager_cv_analysis_service"
|
||||||
|
exporter = "otlp"
|
||||||
|
|
||||||
|
[webserver]
|
||||||
|
host = "0.0.0.0"
|
||||||
|
port = 8080
|
||||||
|
|
||||||
|
[rabbitmq]
|
||||||
|
host = "localhost"
|
||||||
|
port = 5672
|
||||||
|
username = ""
|
||||||
|
password = ""
|
||||||
|
heartbeat = 60
|
||||||
|
# Has to be a divider of heartbeat, and shouldn't be too big, since only in these intervals queue interactions happen (like receiving new messages)
|
||||||
|
# This is also the minimum time the service needs to process a message
|
||||||
|
connection_sleep = 5
|
||||||
|
input_queue = "request_queue"
|
||||||
|
output_queue = "response_queue"
|
||||||
|
dead_letter_queue = "dead_letter_queue"
|
||||||
|
|
||||||
|
tenant_event_queue_suffix = "_tenant_event_queue"
|
||||||
|
tenant_event_dlq_suffix = "_tenant_events_dlq"
|
||||||
|
tenant_exchange_name = "tenants-exchange"
|
||||||
|
queue_expiration_time = 300000 # 5 minutes in milliseconds
|
||||||
|
service_request_queue_prefix = "cv_analysis_request_queue"
|
||||||
|
service_request_exchange_name = "cv_analysis_request_exchange"
|
||||||
|
service_response_exchange_name = "cv_analysis_response_exchange"
|
||||||
|
service_dlq_name = "cv_analysis_dlq"
|
||||||
|
|
||||||
|
[storage]
|
||||||
|
backend = "s3"
|
||||||
|
|
||||||
|
[storage.s3]
|
||||||
|
bucket = "redaction"
|
||||||
|
endpoint = "http://127.0.0.1:9000"
|
||||||
|
key = ""
|
||||||
|
secret = ""
|
||||||
|
region = "eu-central-1"
|
||||||
|
|
||||||
|
[storage.azure]
|
||||||
|
container = "redaction"
|
||||||
|
connection_string = ""
|
||||||
|
|
||||||
|
[storage.tenant_server]
|
||||||
|
public_key = ""
|
||||||
|
endpoint = "http://tenant-user-management:8081/internal-api/tenants"
|
||||||
|
|
||||||
|
[kubernetes]
|
||||||
|
pod_name = "test_pod"
|
||||||
19
config/settings.toml
Normal file
19
config/settings.toml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
[logging]
|
||||||
|
level = "INFO"
|
||||||
|
visual_logging_level = "DISABLED"
|
||||||
|
visual_logging_output_folder = "/tmp/debug"
|
||||||
|
|
||||||
|
[table_parsing]
|
||||||
|
skip_pages_without_images = true
|
||||||
|
|
||||||
|
[paths]
|
||||||
|
root = "@format {env[ROOT_PATH]}"
|
||||||
|
dvc_data_dir = "${paths.root}/data"
|
||||||
|
pdf_for_testing = "${paths.dvc_data_dir}/pdfs_for_testing"
|
||||||
|
png_for_testing = "${paths.dvc_data_dir}/pngs_for_testing"
|
||||||
|
png_figures_detected = "${paths.png_for_testing}/figures_detected"
|
||||||
|
png_tables_detected = "${paths.png_for_testing}/tables_detected_by_tp"
|
||||||
|
hashed_pdfs_for_testing = "${paths.pdf_for_testing}/hashed"
|
||||||
|
metadata_test_files = "${paths.dvc_data_dir}/metadata_testing_files.csv"
|
||||||
|
test_dir = "${paths.dvc_data_dir}/test"
|
||||||
|
test_data_dir = "${paths.dvc_data_dir}/test/test_data"
|
||||||
@ -1,30 +0,0 @@
|
|||||||
import os
|
|
||||||
|
|
||||||
|
|
||||||
def get_config():
|
|
||||||
return Config()
|
|
||||||
|
|
||||||
|
|
||||||
class Config:
|
|
||||||
def __init__(self):
|
|
||||||
self.logging_level_root = os.environ.get("LOGGING_LEVEL_ROOT", "INFO")
|
|
||||||
|
|
||||||
# visual_logging_level: NOTHING > INFO > DEBUG > ALL
|
|
||||||
self.visual_logging_level = "DISABLED"
|
|
||||||
self.visual_logging_output_folder = "/tmp/debug"
|
|
||||||
|
|
||||||
# locations
|
|
||||||
# FIXME: is everything here necessary?
|
|
||||||
root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
||||||
self.dvc_data_dir = os.path.join(root, "data")
|
|
||||||
self.pdf_for_testing = os.path.join(self.dvc_data_dir, "pdfs_for_testing")
|
|
||||||
self.png_for_testing = os.path.join(self.dvc_data_dir, "pngs_for_testing")
|
|
||||||
self.png_figures_detected = os.path.join(self.png_for_testing, "figures_detected")
|
|
||||||
self.png_tables_detected = os.path.join(self.png_for_testing, "tables_detected_by_tp")
|
|
||||||
self.hashed_pdfs_for_testing = os.path.join(self.pdf_for_testing, "hashed")
|
|
||||||
self.metadata_test_files = os.path.join(self.dvc_data_dir, "metadata_testing_files.csv")
|
|
||||||
self.test_dir = os.path.join(root, "test")
|
|
||||||
self.test_data_dir = os.path.join(self.test_dir, "test_data")
|
|
||||||
|
|
||||||
def __getitem__(self, key):
|
|
||||||
return self.__getattribute__(key)
|
|
||||||
@ -1,56 +0,0 @@
|
|||||||
from dataclasses import asdict
|
|
||||||
from operator import truth
|
|
||||||
|
|
||||||
from funcy import lmap, flatten
|
|
||||||
|
|
||||||
from cv_analysis.figure_detection.figure_detection import detect_figures
|
|
||||||
from cv_analysis.table_parsing import parse_tables
|
|
||||||
from cv_analysis.utils.structures import Rectangle
|
|
||||||
from pdf2img.conversion import convert_pages_to_images
|
|
||||||
from pdf2img.default_objects.image import ImagePlus, ImageInfo
|
|
||||||
from pdf2img.default_objects.rectangle import RectanglePlus
|
|
||||||
|
|
||||||
|
|
||||||
def get_analysis_pipeline(operation):
|
|
||||||
if operation == "table":
|
|
||||||
return make_analysis_pipeline(parse_tables, table_parsing_formatter, dpi=200)
|
|
||||||
elif operation == "figure":
|
|
||||||
return make_analysis_pipeline(detect_figures, figure_detection_formatter, dpi=200)
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def make_analysis_pipeline(analysis_fn, formatter, dpi):
|
|
||||||
def analyse_pipeline(pdf: bytes, index=None):
|
|
||||||
def parse_page(page: ImagePlus):
|
|
||||||
image = page.asarray()
|
|
||||||
rects = analysis_fn(image)
|
|
||||||
if not rects:
|
|
||||||
return
|
|
||||||
infos = formatter(rects, page, dpi)
|
|
||||||
return infos
|
|
||||||
|
|
||||||
pages = convert_pages_to_images(pdf, index=index, dpi=dpi)
|
|
||||||
results = map(parse_page, pages)
|
|
||||||
|
|
||||||
yield from flatten(filter(truth, results))
|
|
||||||
|
|
||||||
return analyse_pipeline
|
|
||||||
|
|
||||||
|
|
||||||
def table_parsing_formatter(rects, page: ImagePlus, dpi):
|
|
||||||
def format_rect(rect: Rectangle):
|
|
||||||
rect_plus = RectanglePlus.from_pixels(*rect.xyxy(), page.info, alpha=False, dpi=dpi)
|
|
||||||
return rect_plus.asdict(derotate=True)
|
|
||||||
|
|
||||||
bboxes = lmap(format_rect, rects)
|
|
||||||
|
|
||||||
return {"pageInfo": page.asdict(natural_index=True), "tableCells": bboxes}
|
|
||||||
|
|
||||||
|
|
||||||
def figure_detection_formatter(rects, page, dpi):
|
|
||||||
def format_rect(rect: Rectangle):
|
|
||||||
rect_plus = RectanglePlus.from_pixels(*rect.xyxy(), page.info, alpha=False, dpi=dpi)
|
|
||||||
return asdict(ImageInfo(page.info, rect_plus.asbbox(derotate=False), rect_plus.alpha))
|
|
||||||
|
|
||||||
return lmap(format_rect, rects)
|
|
||||||
@ -1,139 +0,0 @@
|
|||||||
from functools import partial
|
|
||||||
from itertools import chain, starmap
|
|
||||||
from operator import attrgetter
|
|
||||||
|
|
||||||
import cv2
|
|
||||||
import numpy as np
|
|
||||||
from funcy import lmap, lfilter
|
|
||||||
|
|
||||||
from cv_analysis.layout_parsing import parse_layout
|
|
||||||
from cv_analysis.utils.postprocessing import remove_isolated # xywh_to_vecs, xywh_to_vec_rect, adjacent1d
|
|
||||||
from cv_analysis.utils.structures import Rectangle
|
|
||||||
from cv_analysis.utils.visual_logging import vizlogger
|
|
||||||
|
|
||||||
|
|
||||||
def add_external_contours(image, image_h_w_lines_only):
|
|
||||||
|
|
||||||
contours, _ = cv2.findContours(image_h_w_lines_only, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
|
|
||||||
for cnt in contours:
|
|
||||||
x, y, w, h = cv2.boundingRect(cnt)
|
|
||||||
cv2.rectangle(image, (x, y), (x + w, y + h), 255, 1)
|
|
||||||
|
|
||||||
return image
|
|
||||||
|
|
||||||
|
|
||||||
def apply_motion_blur(image: np.array, angle, size=80):
|
|
||||||
"""Solidifies and slightly extends detected lines.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
image (np.array): page image as array
|
|
||||||
angle: direction in which to apply blur, 0 or 90
|
|
||||||
size (int): kernel size; 80 found empirically to work well
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
np.array
|
|
||||||
|
|
||||||
"""
|
|
||||||
k = np.zeros((size, size), dtype=np.float32)
|
|
||||||
vizlogger.debug(k, "tables08_blur_kernel1.png")
|
|
||||||
k[(size - 1) // 2, :] = np.ones(size, dtype=np.float32)
|
|
||||||
vizlogger.debug(k, "tables09_blur_kernel2.png")
|
|
||||||
k = cv2.warpAffine(
|
|
||||||
k,
|
|
||||||
cv2.getRotationMatrix2D((size / 2 - 0.5, size / 2 - 0.5), angle, 1.0),
|
|
||||||
(size, size),
|
|
||||||
)
|
|
||||||
vizlogger.debug(k, "tables10_blur_kernel3.png")
|
|
||||||
k = k * (1.0 / np.sum(k))
|
|
||||||
vizlogger.debug(k, "tables11_blur_kernel4.png")
|
|
||||||
blurred = cv2.filter2D(image, -1, k)
|
|
||||||
return blurred
|
|
||||||
|
|
||||||
|
|
||||||
def isolate_vertical_and_horizontal_components(img_bin):
|
|
||||||
"""Identifies and reinforces horizontal and vertical lines in a binary image.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
img_bin (np.array): array corresponding to single binarized page image
|
|
||||||
bounding_rects (list): list of layout boxes of the form (x, y, w, h), potentially containing tables
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
np.array
|
|
||||||
"""
|
|
||||||
line_min_width = 48
|
|
||||||
kernel_h = np.ones((1, line_min_width), np.uint8)
|
|
||||||
kernel_v = np.ones((line_min_width, 1), np.uint8)
|
|
||||||
|
|
||||||
img_bin_h = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernel_h)
|
|
||||||
img_bin_v = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernel_v)
|
|
||||||
img_lines_raw = img_bin_v | img_bin_h
|
|
||||||
|
|
||||||
kernel_h = np.ones((1, 30), np.uint8)
|
|
||||||
kernel_v = np.ones((30, 1), np.uint8)
|
|
||||||
img_bin_h = cv2.dilate(img_bin_h, kernel_h, iterations=2)
|
|
||||||
img_bin_v = cv2.dilate(img_bin_v, kernel_v, iterations=2)
|
|
||||||
|
|
||||||
img_bin_h = apply_motion_blur(img_bin_h, 0)
|
|
||||||
img_bin_v = apply_motion_blur(img_bin_v, 90)
|
|
||||||
|
|
||||||
img_bin_extended = img_bin_h | img_bin_v
|
|
||||||
|
|
||||||
th1, img_bin_extended = cv2.threshold(img_bin_extended, 120, 255, cv2.THRESH_BINARY)
|
|
||||||
img_bin_final = cv2.dilate(img_bin_extended, np.ones((1, 1), np.uint8), iterations=1)
|
|
||||||
# add contours before lines are extended by blurring
|
|
||||||
img_bin_final = add_external_contours(img_bin_final, img_lines_raw)
|
|
||||||
|
|
||||||
return img_bin_final
|
|
||||||
|
|
||||||
|
|
||||||
def find_table_layout_boxes(image: np.array):
|
|
||||||
def is_large_enough(box):
|
|
||||||
(x, y, w, h) = box
|
|
||||||
if w * h >= 100000:
|
|
||||||
return Rectangle.from_xywh(box)
|
|
||||||
|
|
||||||
layout_boxes = parse_layout(image)
|
|
||||||
a = lmap(is_large_enough, layout_boxes)
|
|
||||||
return lmap(is_large_enough, layout_boxes)
|
|
||||||
|
|
||||||
|
|
||||||
def preprocess(image: np.array):
|
|
||||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image
|
|
||||||
_, image = cv2.threshold(image, 195, 255, cv2.THRESH_BINARY)
|
|
||||||
return ~image
|
|
||||||
|
|
||||||
|
|
||||||
def turn_connected_components_into_rects(image: np.array):
|
|
||||||
def is_large_enough(stat):
|
|
||||||
x1, y1, w, h, area = stat
|
|
||||||
return area > 2000 and w > 35 and h > 25
|
|
||||||
|
|
||||||
_, _, stats, _ = cv2.connectedComponentsWithStats(~image, connectivity=8, ltype=cv2.CV_32S)
|
|
||||||
|
|
||||||
stats = lfilter(is_large_enough, stats)
|
|
||||||
if stats:
|
|
||||||
stats = np.vstack(stats)
|
|
||||||
return stats[:, :-1][2:]
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def parse_tables(image: np.array, show=False):
|
|
||||||
"""Runs the full table parsing process.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
image (np.array): single PDF page, converted to a numpy array
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
list: list of rectangles corresponding to table cells
|
|
||||||
"""
|
|
||||||
|
|
||||||
image = preprocess(image)
|
|
||||||
image = isolate_vertical_and_horizontal_components(image)
|
|
||||||
rects = turn_connected_components_into_rects(image)
|
|
||||||
#print(rects, "\n\n")
|
|
||||||
rects = list(map(Rectangle.from_xywh, rects))
|
|
||||||
#print(rects, "\n\n")
|
|
||||||
rects = remove_isolated(rects)
|
|
||||||
#print(rects, "\n\n")
|
|
||||||
|
|
||||||
return rects
|
|
||||||
BIN
data/2017-1078223.pdf
Normal file
BIN
data/2017-1078223.pdf
Normal file
Binary file not shown.
BIN
data/2017-1078223.vlp_output.annotated.pdf
Normal file
BIN
data/2017-1078223.vlp_output.annotated.pdf
Normal file
Binary file not shown.
98825
data/2017-1078223.vlp_output.json
Normal file
98825
data/2017-1078223.vlp_output.json
Normal file
File diff suppressed because it is too large
Load Diff
BIN
data/table_inference_test_files.zip
Normal file
BIN
data/table_inference_test_files.zip
Normal file
Binary file not shown.
30
devenvsetup.sh
Normal file
30
devenvsetup.sh
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
python_version=$1
|
||||||
|
gitlab_user=$2
|
||||||
|
gitlab_personal_access_token=$3
|
||||||
|
|
||||||
|
# cookiecutter https://gitlab.knecon.com/knecon/research/template-python-project.git --checkout master
|
||||||
|
# latest_dir=$(ls -td -- */ | head -n 1) # should be the dir cookiecutter just created
|
||||||
|
|
||||||
|
# cd $latest_dir
|
||||||
|
|
||||||
|
pyenv install $python_version
|
||||||
|
pyenv local $python_version
|
||||||
|
pyenv shell $python_version
|
||||||
|
|
||||||
|
pip install --upgrade pip
|
||||||
|
pip install poetry
|
||||||
|
|
||||||
|
poetry config installer.max-workers 10
|
||||||
|
# research package registry
|
||||||
|
poetry config repositories.gitlab-research https://gitlab.knecon.com/api/v4/groups/19/-/packages/pypi
|
||||||
|
poetry config http-basic.gitlab-research ${gitlab_user} ${gitlab_personal_access_token}
|
||||||
|
# redactmanager package registry
|
||||||
|
poetry config repositories.gitlab-red https://gitlab.knecon.com/api/v4/groups/12/-/packages/pypi
|
||||||
|
poetry config http-basic.gitlab-red ${gitlab_user} ${gitlab_personal_access_token}
|
||||||
|
|
||||||
|
poetry env use $(pyenv which python)
|
||||||
|
poetry install --with=dev
|
||||||
|
poetry update
|
||||||
|
|
||||||
|
source .venv/bin/activate
|
||||||
@ -28,4 +28,4 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- /opt/bitnami/rabbitmq/.rabbitmq/:/data/bitnami
|
- /opt/bitnami/rabbitmq/.rabbitmq/:/data/bitnami
|
||||||
volumes:
|
volumes:
|
||||||
mdata:
|
mdata:
|
||||||
|
|||||||
4
docs/build/html/.buildinfo
vendored
Normal file
4
docs/build/html/.buildinfo
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
# Sphinx build info version 1
|
||||||
|
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
|
||||||
|
config: 04e9c6c5d3e412413c2949e598da60dc
|
||||||
|
tags: 645f666f9bcd5a90fca523b33c5a78b7
|
||||||
BIN
docs/build/html/.doctrees/README.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/README.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/environment.pickle
vendored
Normal file
BIN
docs/build/html/.doctrees/environment.pickle
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/index.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/index.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.config.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.config.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.figure_detection.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.figure_detection.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.figure_detection.figure_detection.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.figure_detection.figure_detection.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.figure_detection.figures.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.figure_detection.figures.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.figure_detection.text.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.figure_detection.text.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.layout_parsing.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.layout_parsing.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.locations.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.locations.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.redaction_detection.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.redaction_detection.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.server.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.server.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.server.pipeline.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.server.pipeline.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.table_inference.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.table_inference.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.table_parsing.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.table_parsing.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.annotate.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.annotate.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.banner.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.banner.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.connect_rects.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.connect_rects.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.display.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.display.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.draw.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.draw.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.filters.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.filters.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.image_extraction.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.image_extraction.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.open_pdf.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.open_pdf.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.postprocessing.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.postprocessing.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.preprocessing.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.preprocessing.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.structures.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.structures.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.test_metrics.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.test_metrics.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.utils.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.utils.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.visual_logging.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/cv_analysis.utils.visual_logging.doctree
vendored
Normal file
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/serve.doctree
vendored
Normal file
BIN
docs/build/html/.doctrees/modules/serve.doctree
vendored
Normal file
Binary file not shown.
657
docs/build/html/README.html
vendored
Normal file
657
docs/build/html/README.html
vendored
Normal file
@ -0,0 +1,657 @@
|
|||||||
|
|
||||||
|
<!DOCTYPE html>
|
||||||
|
|
||||||
|
|
||||||
|
<html lang="en" data-content_root="./" >
|
||||||
|
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
|
||||||
|
<title>cv-analysis - Visual (CV-Based) Document Parsing — CV Analysis Service 2.5.2 documentation</title>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<script data-cfasync="false">
|
||||||
|
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
||||||
|
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<!-- Loaded before other Sphinx assets -->
|
||||||
|
<link href="_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
|
||||||
|
<link href="_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
|
||||||
|
<link href="_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
|
||||||
|
|
||||||
|
|
||||||
|
<link href="_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
|
||||||
|
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
|
||||||
|
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
|
||||||
|
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
|
||||||
|
|
||||||
|
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=a746c00c" />
|
||||||
|
<link rel="stylesheet" type="text/css" href="https://assets.readthedocs.org/static/css/badge_only.css" />
|
||||||
|
|
||||||
|
<!-- Pre-loaded scripts that we'll load fully later -->
|
||||||
|
<link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
|
||||||
|
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
|
||||||
|
<script src="_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
|
||||||
|
|
||||||
|
<script src="_static/documentation_options.js?v=afc61bbc"></script>
|
||||||
|
<script src="_static/doctools.js?v=9a2dae69"></script>
|
||||||
|
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||||
|
<script>DOCUMENTATION_OPTIONS.pagename = 'README';</script>
|
||||||
|
<script async="async" src="https://assets.readthedocs.org/static/javascript/readthedocs-doc-embed.js"></script>
|
||||||
|
<link rel="index" title="Index" href="genindex.html" />
|
||||||
|
<link rel="search" title="Search" href="search.html" />
|
||||||
|
<link rel="next" title="cv_analysis package" href="modules/cv_analysis.html" />
|
||||||
|
<link rel="prev" title="Welcome to CV Analysis Service documentation!" href="index.html" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||||
|
<meta name="docsearch:language" content="en"/>
|
||||||
|
|
||||||
|
<!-- RTD Extra Head -->
|
||||||
|
|
||||||
|
<link rel="stylesheet" href="https://assets.readthedocs.org/static/css/readthedocs-doc-embed.css" type="text/css" />
|
||||||
|
|
||||||
|
<script type="application/json" id="READTHEDOCS_DATA">{"ad_free": "", "api_host": "", "builder": "sphinx", "canonical_url": "", "docroot": "", "features": {"docsearch_disabled": false}, "global_analytics_code": null, "language": "", "page": "README", "programming_language": "", "project": "", "source_suffix": ".md", "subprojects": {}, "theme": "", "user_analytics_code": null, "version": ""}</script>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Using this variable directly instead of using `JSON.parse` is deprecated.
|
||||||
|
The READTHEDOCS_DATA global variable will be removed in the future.
|
||||||
|
-->
|
||||||
|
<script type="text/javascript">
|
||||||
|
READTHEDOCS_DATA = JSON.parse(document.getElementById('READTHEDOCS_DATA').innerHTML);
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<script type="text/javascript" src="https://assets.readthedocs.org/static/javascript/readthedocs-analytics.js" async="async"></script>
|
||||||
|
|
||||||
|
<!-- end RTD <extrahead> -->
|
||||||
|
</head>
|
||||||
|
|
||||||
|
|
||||||
|
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
|
||||||
|
|
||||||
|
<div id="pst-scroll-pixel-helper"></div>
|
||||||
|
|
||||||
|
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
||||||
|
<i class="fa-solid fa-arrow-up"></i>
|
||||||
|
Back to top
|
||||||
|
</button>
|
||||||
|
|
||||||
|
|
||||||
|
<input type="checkbox"
|
||||||
|
class="sidebar-toggle"
|
||||||
|
name="__primary"
|
||||||
|
id="__primary"/>
|
||||||
|
<label class="overlay overlay-primary" for="__primary"></label>
|
||||||
|
|
||||||
|
<input type="checkbox"
|
||||||
|
class="sidebar-toggle"
|
||||||
|
name="__secondary"
|
||||||
|
id="__secondary"/>
|
||||||
|
<label class="overlay overlay-secondary" for="__secondary"></label>
|
||||||
|
|
||||||
|
<div class="search-button__wrapper">
|
||||||
|
<div class="search-button__overlay"></div>
|
||||||
|
<div class="search-button__search-container">
|
||||||
|
<form class="bd-search d-flex align-items-center"
|
||||||
|
action="search.html"
|
||||||
|
method="get">
|
||||||
|
<i class="fa-solid fa-magnifying-glass"></i>
|
||||||
|
<input type="search"
|
||||||
|
class="form-control"
|
||||||
|
name="q"
|
||||||
|
id="search-input"
|
||||||
|
placeholder="Search the docs ..."
|
||||||
|
aria-label="Search the docs ..."
|
||||||
|
autocomplete="off"
|
||||||
|
autocorrect="off"
|
||||||
|
autocapitalize="off"
|
||||||
|
spellcheck="false"/>
|
||||||
|
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
||||||
|
</form></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<header class="bd-header navbar navbar-expand-lg bd-navbar">
|
||||||
|
<div class="bd-header__inner bd-page-width">
|
||||||
|
<label class="sidebar-toggle primary-toggle" for="__primary">
|
||||||
|
<span class="fa-solid fa-bars"></span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
|
||||||
|
<div class="col-lg-3 navbar-header-items__start">
|
||||||
|
|
||||||
|
<div class="navbar-item">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<a class="navbar-brand logo" href="index.html">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<img src="_static/logo.png" class="logo__image only-light" alt="CV Analysis Service 2.5.2 documentation - Home"/>
|
||||||
|
<script>document.write(`<img src="_static/logo.png" class="logo__image only-dark" alt="CV Analysis Service 2.5.2 documentation - Home"/>`);</script>
|
||||||
|
|
||||||
|
|
||||||
|
</a></div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="col-lg-9 navbar-header-items">
|
||||||
|
|
||||||
|
<div class="me-auto navbar-header-items__center">
|
||||||
|
|
||||||
|
<div class="navbar-item">
|
||||||
|
<nav class="navbar-nav">
|
||||||
|
<ul class="bd-navbar-elements navbar-nav">
|
||||||
|
|
||||||
|
<li class="nav-item current active">
|
||||||
|
<a class="nav-link nav-internal" href="#">
|
||||||
|
cv-analysis - Visual (CV-Based) Document Parsing
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link nav-internal" href="modules/cv_analysis.html">
|
||||||
|
cv_analysis package
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link nav-internal" href="modules/serve.html">
|
||||||
|
serve module
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
</nav></div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
<div class="navbar-header-items__end">
|
||||||
|
|
||||||
|
<div class="navbar-item navbar-persistent--container">
|
||||||
|
|
||||||
|
|
||||||
|
<script>
|
||||||
|
document.write(`
|
||||||
|
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||||
|
<i class="fa-solid fa-magnifying-glass"></i>
|
||||||
|
<span class="search-button__default-text">Search</span>
|
||||||
|
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||||||
|
</button>
|
||||||
|
`);
|
||||||
|
</script>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
<div class="navbar-item">
|
||||||
|
|
||||||
|
<script>
|
||||||
|
document.write(`
|
||||||
|
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||||
|
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
|
||||||
|
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
|
||||||
|
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
|
||||||
|
</button>
|
||||||
|
`);
|
||||||
|
</script></div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
<div class="navbar-persistent--mobile">
|
||||||
|
|
||||||
|
<script>
|
||||||
|
document.write(`
|
||||||
|
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||||
|
<i class="fa-solid fa-magnifying-glass"></i>
|
||||||
|
<span class="search-button__default-text">Search</span>
|
||||||
|
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||||||
|
</button>
|
||||||
|
`);
|
||||||
|
</script>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
|
||||||
|
<span class="fa-solid fa-outdent"></span>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</header>
|
||||||
|
|
||||||
|
|
||||||
|
<div class="bd-container">
|
||||||
|
<div class="bd-container__inner bd-page-width">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div class="bd-sidebar-primary bd-sidebar">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div class="sidebar-header-items sidebar-primary__section">
|
||||||
|
|
||||||
|
|
||||||
|
<div class="sidebar-header-items__center">
|
||||||
|
|
||||||
|
<div class="navbar-item">
|
||||||
|
<nav class="navbar-nav">
|
||||||
|
<ul class="bd-navbar-elements navbar-nav">
|
||||||
|
|
||||||
|
<li class="nav-item current active">
|
||||||
|
<a class="nav-link nav-internal" href="#">
|
||||||
|
cv-analysis - Visual (CV-Based) Document Parsing
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link nav-internal" href="modules/cv_analysis.html">
|
||||||
|
cv_analysis package
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link nav-internal" href="modules/serve.html">
|
||||||
|
serve module
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
</nav></div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div class="sidebar-header-items__end">
|
||||||
|
|
||||||
|
<div class="navbar-item">
|
||||||
|
|
||||||
|
<script>
|
||||||
|
document.write(`
|
||||||
|
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||||
|
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
|
||||||
|
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
|
||||||
|
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
|
||||||
|
</button>
|
||||||
|
`);
|
||||||
|
</script></div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="sidebar-primary-items__start sidebar-primary__section">
|
||||||
|
<div class="sidebar-primary-item">
|
||||||
|
<nav class="bd-docs-nav bd-links"
|
||||||
|
aria-label="Section Navigation">
|
||||||
|
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
|
||||||
|
<div class="bd-toc-item navbar-nav"></div>
|
||||||
|
</nav></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
<div class="sidebar-primary-items__end sidebar-primary__section">
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="rtd-footer-container"></div>
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<main id="main-content" class="bd-main">
|
||||||
|
|
||||||
|
|
||||||
|
<div class="bd-content">
|
||||||
|
<div class="bd-article-container">
|
||||||
|
|
||||||
|
<div class="bd-header-article">
|
||||||
|
<div class="header-article-items header-article__inner">
|
||||||
|
|
||||||
|
<div class="header-article-items__start">
|
||||||
|
|
||||||
|
<div class="header-article-item">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<nav aria-label="Breadcrumb">
|
||||||
|
<ul class="bd-breadcrumbs">
|
||||||
|
|
||||||
|
<li class="breadcrumb-item breadcrumb-home">
|
||||||
|
<a href="index.html" class="nav-link" aria-label="Home">
|
||||||
|
<i class="fa-solid fa-home"></i>
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
<li class="breadcrumb-item active" aria-current="page">cv-analysis...</li>
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div id="searchbox"></div>
|
||||||
|
<article class="bd-article">
|
||||||
|
|
||||||
|
<section id="cv-analysis-visual-cv-based-document-parsing">
|
||||||
|
<h1>cv-analysis - Visual (CV-Based) Document Parsing<a class="headerlink" href="#cv-analysis-visual-cv-based-document-parsing" title="Link to this heading">#</a></h1>
|
||||||
|
<p>parse_pdf()
|
||||||
|
This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
|
||||||
|
previous redactions in documents.</p>
|
||||||
|
<section id="api">
|
||||||
|
<h2>API<a class="headerlink" href="#api" title="Link to this heading">#</a></h2>
|
||||||
|
<p>Input message:</p>
|
||||||
|
<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
|
||||||
|
<span class="w"> </span><span class="nt">"targetFilePath"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||||
|
<span class="w"> </span><span class="nt">"pdf"</span><span class="p">:</span><span class="w"> </span><span class="s2">"absolute file path"</span><span class="p">,</span>
|
||||||
|
<span class="w"> </span><span class="nt">"vlp_output"</span><span class="p">:</span><span class="w"> </span><span class="s2">"absolute file path"</span>
|
||||||
|
<span class="w"> </span><span class="p">},</span>
|
||||||
|
<span class="w"> </span><span class="nt">"responseFilePath"</span><span class="p">:</span><span class="w"> </span><span class="s2">"absolute file path"</span><span class="p">,</span>
|
||||||
|
<span class="w"> </span><span class="nt">"operation"</span><span class="p">:</span><span class="w"> </span><span class="s2">"table_image_inference"</span>
|
||||||
|
<span class="p">}</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>Response is uploaded to the storage as specified in the <code class="docutils literal notranslate"><span class="pre">responseFilePath</span></code> field. The structure is as follows:</p>
|
||||||
|
<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
|
||||||
|
<span class="w"> </span><span class="err">...</span><span class="p">,</span>
|
||||||
|
<span class="w"> </span><span class="nt">"data"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
|
||||||
|
<span class="w"> </span><span class="p">{</span>
|
||||||
|
<span class="w"> </span><span class="err">'pageNum'</span><span class="p">:</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span>
|
||||||
|
<span class="w"> </span><span class="err">'bbox'</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||||
|
<span class="w"> </span><span class="err">'x</span><span class="mi">1</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="mf">55.3407</span><span class="p">,</span>
|
||||||
|
<span class="w"> </span><span class="err">'y</span><span class="mi">1</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="mf">247.0246</span><span class="p">,</span>
|
||||||
|
<span class="w"> </span><span class="err">'x</span><span class="mi">2</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="mf">558.5602</span><span class="p">,</span>
|
||||||
|
<span class="w"> </span><span class="err">'y</span><span class="mi">2</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="mf">598.0585</span>
|
||||||
|
<span class="w"> </span><span class="p">},</span>
|
||||||
|
<span class="w"> </span><span class="err">'uuid'</span><span class="p">:</span><span class="w"> </span><span class="err">'</span><span class="mi">2</span><span class="err">b</span><span class="mi">10</span><span class="err">c</span><span class="mi">1</span><span class="err">a</span><span class="mi">2-393</span><span class="err">c</span><span class="mi">-4</span><span class="kc">f</span><span class="err">ca</span><span class="mi">-</span><span class="err">b</span><span class="mf">9e3-0</span><span class="err">ad</span><span class="mi">5</span><span class="err">b</span><span class="mi">774</span><span class="err">ac</span><span class="mi">84</span><span class="err">'</span><span class="p">,</span>
|
||||||
|
<span class="w"> </span><span class="err">'label'</span><span class="p">:</span><span class="w"> </span><span class="err">'</span><span class="kc">ta</span><span class="err">ble'</span><span class="p">,</span>
|
||||||
|
<span class="w"> </span><span class="err">'</span><span class="kc">ta</span><span class="err">bleLi</span><span class="kc">nes</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
|
||||||
|
<span class="w"> </span><span class="p">{</span>
|
||||||
|
<span class="w"> </span><span class="err">'x</span><span class="mi">1</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span>
|
||||||
|
<span class="w"> </span><span class="err">'y</span><span class="mi">1</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="mi">16</span><span class="p">,</span>
|
||||||
|
<span class="w"> </span><span class="err">'x</span><span class="mi">2</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="mi">1399</span><span class="p">,</span>
|
||||||
|
<span class="w"> </span><span class="err">'y</span><span class="mi">2</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="mi">16</span>
|
||||||
|
<span class="w"> </span><span class="p">},</span>
|
||||||
|
<span class="w"> </span><span class="err">...</span>
|
||||||
|
<span class="w"> </span><span class="p">],</span>
|
||||||
|
<span class="w"> </span><span class="err">'imageI</span><span class="kc">nf</span><span class="err">o'</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||||
|
<span class="w"> </span><span class="err">'heigh</span><span class="kc">t</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="mi">693</span><span class="p">,</span>
|
||||||
|
<span class="w"> </span><span class="err">'wid</span><span class="kc">t</span><span class="err">h'</span><span class="p">:</span><span class="w"> </span><span class="mi">1414</span>
|
||||||
|
<span class="w"> </span><span class="p">}</span>
|
||||||
|
<span class="w"> </span><span class="p">},</span>
|
||||||
|
<span class="w"> </span><span class="err">...</span>
|
||||||
|
<span class="w"> </span><span class="p">]</span>
|
||||||
|
<span class="p">}</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
<section id="installation">
|
||||||
|
<h2>Installation<a class="headerlink" href="#installation" title="Link to this heading">#</a></h2>
|
||||||
|
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>git<span class="w"> </span>clone<span class="w"> </span>ssh://git@git.iqser.com:2222/rr/cv-analysis.git
|
||||||
|
<span class="nb">cd</span><span class="w"> </span>cv-analysis
|
||||||
|
|
||||||
|
python<span class="w"> </span>-m<span class="w"> </span>venv<span class="w"> </span>env
|
||||||
|
<span class="nb">source</span><span class="w"> </span>env/bin/activate
|
||||||
|
|
||||||
|
pip<span class="w"> </span>install<span class="w"> </span>-e<span class="w"> </span>.
|
||||||
|
pip<span class="w"> </span>install<span class="w"> </span>-r<span class="w"> </span>requirements.txt
|
||||||
|
|
||||||
|
dvc<span class="w"> </span>pull
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
<section id="usage">
|
||||||
|
<h2>Usage<a class="headerlink" href="#usage" title="Link to this heading">#</a></h2>
|
||||||
|
<section id="as-an-api">
|
||||||
|
<h3>As an API<a class="headerlink" href="#as-an-api" title="Link to this heading">#</a></h3>
|
||||||
|
<p>The module provided functions for the individual tasks that all return some kind of collection of points, depending on
|
||||||
|
the specific task.</p>
|
||||||
|
<section id="redaction-detection-api">
|
||||||
|
<h4>Redaction Detection (API)<a class="headerlink" href="#redaction-detection-api" title="Link to this heading">#</a></h4>
|
||||||
|
<p>The below snippet shows hot to find the outlines of previous redactions.</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">cv_analysis.redaction_detection</span> <span class="kn">import</span> <span class="n">find_redactions</span>
|
||||||
|
<span class="kn">import</span> <span class="nn">pdf2image</span>
|
||||||
|
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||||
|
|
||||||
|
<span class="n">pdf_path</span> <span class="o">=</span> <span class="o">...</span>
|
||||||
|
<span class="n">page_index</span> <span class="o">=</span> <span class="o">...</span>
|
||||||
|
|
||||||
|
<span class="n">page</span> <span class="o">=</span> <span class="n">pdf2image</span><span class="o">.</span><span class="n">convert_from_path</span><span class="p">(</span><span class="n">pdf_path</span><span class="p">,</span> <span class="n">first_page</span><span class="o">=</span><span class="n">page_index</span><span class="p">,</span> <span class="n">last_page</span><span class="o">=</span><span class="n">page_index</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||||
|
<span class="n">page</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">page</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="n">redaction_contours</span> <span class="o">=</span> <span class="n">find_redactions</span><span class="p">(</span><span class="n">page</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</section>
|
||||||
|
</section>
|
||||||
|
<section id="as-a-cli-tool">
|
||||||
|
<h2>As a CLI Tool<a class="headerlink" href="#as-a-cli-tool" title="Link to this heading">#</a></h2>
|
||||||
|
<p>Core API functionalities can be used through a CLI.</p>
|
||||||
|
<section id="table-parsing">
|
||||||
|
<h3>Table Parsing<a class="headerlink" href="#table-parsing" title="Link to this heading">#</a></h3>
|
||||||
|
<p>The tables parsing utility detects and segments tables into individual cells.</p>
|
||||||
|
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">7</span><span class="w"> </span>--type<span class="w"> </span>table
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>The below image shows a parsed table, where each table cell has been detected individually.</p>
|
||||||
|
<p><img alt="Table Parsing Demonstration" src="_images/table_parsing.png" /></p>
|
||||||
|
</section>
|
||||||
|
<section id="redaction-detection-cli">
|
||||||
|
<h3>Redaction Detection (CLI)<a class="headerlink" href="#redaction-detection-cli" title="Link to this heading">#</a></h3>
|
||||||
|
<p>The redaction detection utility detects previous redactions in PDFs (filled black rectangles).</p>
|
||||||
|
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">2</span><span class="w"> </span>--type<span class="w"> </span>redaction
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>The below image shows the detected redactions with green outlines.</p>
|
||||||
|
<p><img alt="Redaction Detection Demonstration" src="_images/redaction_detection.png" /></p>
|
||||||
|
</section>
|
||||||
|
<section id="layout-parsing">
|
||||||
|
<h3>Layout Parsing<a class="headerlink" href="#layout-parsing" title="Link to this heading">#</a></h3>
|
||||||
|
<p>The layout parsing utility detects elements such as paragraphs, tables and figures.</p>
|
||||||
|
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">7</span><span class="w"> </span>--type<span class="w"> </span>layout
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>The below image shows the detected layout elements on a page.</p>
|
||||||
|
<p><img alt="Layout Parsing Demonstration" src="_images/layout_parsing.png" /></p>
|
||||||
|
</section>
|
||||||
|
<section id="figure-detection">
|
||||||
|
<h3>Figure Detection<a class="headerlink" href="#figure-detection" title="Link to this heading">#</a></h3>
|
||||||
|
<p>The figure detection utility detects figures specifically, which can be missed by the generic layout parsing utility.</p>
|
||||||
|
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">3</span><span class="w"> </span>--type<span class="w"> </span>figure
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>The below image shows the detected figure on a page.</p>
|
||||||
|
<p><img alt="Figure Detection Demonstration" src="_images/figure_detection.png" /></p>
|
||||||
|
</section>
|
||||||
|
</section>
|
||||||
|
<section id="running-as-a-service">
|
||||||
|
<h2>Running as a service<a class="headerlink" href="#running-as-a-service" title="Link to this heading">#</a></h2>
|
||||||
|
<section id="building">
|
||||||
|
<h3>Building<a class="headerlink" href="#building" title="Link to this heading">#</a></h3>
|
||||||
|
<p>Build base image</p>
|
||||||
|
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>bash<span class="w"> </span>setup/docker.sh
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>Build head image</p>
|
||||||
|
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>build<span class="w"> </span>-f<span class="w"> </span>Dockerfile<span class="w"> </span>-t<span class="w"> </span>cv-analysis<span class="w"> </span>.<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">BASE_ROOT</span><span class="o">=</span><span class="s2">""</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
<section id="usage-service">
|
||||||
|
<h3>Usage (service)<a class="headerlink" href="#usage-service" title="Link to this heading">#</a></h3>
|
||||||
|
<p>Shell 1</p>
|
||||||
|
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>run<span class="w"> </span>--rm<span class="w"> </span>--net<span class="o">=</span>host<span class="w"> </span>--rm<span class="w"> </span>cv-analysis
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>Shell 2</p>
|
||||||
|
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/client_mock.py<span class="w"> </span>--pdf_path<span class="w"> </span>/path/to/a/pdf
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</section>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
|
||||||
|
</article>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<footer class="prev-next-footer">
|
||||||
|
|
||||||
|
<div class="prev-next-area">
|
||||||
|
<a class="left-prev"
|
||||||
|
href="index.html"
|
||||||
|
title="previous page">
|
||||||
|
<i class="fa-solid fa-angle-left"></i>
|
||||||
|
<div class="prev-next-info">
|
||||||
|
<p class="prev-next-subtitle">previous</p>
|
||||||
|
<p class="prev-next-title">Welcome to CV Analysis Service documentation!</p>
|
||||||
|
</div>
|
||||||
|
</a>
|
||||||
|
<a class="right-next"
|
||||||
|
href="modules/cv_analysis.html"
|
||||||
|
title="next page">
|
||||||
|
<div class="prev-next-info">
|
||||||
|
<p class="prev-next-subtitle">next</p>
|
||||||
|
<p class="prev-next-title">cv_analysis package</p>
|
||||||
|
</div>
|
||||||
|
<i class="fa-solid fa-angle-right"></i>
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
</footer>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
|
||||||
|
|
||||||
|
|
||||||
|
<div class="sidebar-secondary-item">
|
||||||
|
<div
|
||||||
|
id="pst-page-navigation-heading-2"
|
||||||
|
class="page-toc tocsection onthispage">
|
||||||
|
<i class="fa-solid fa-list"></i> On this page
|
||||||
|
</div>
|
||||||
|
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
|
||||||
|
<ul class="visible nav section-nav flex-column">
|
||||||
|
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#api">API</a></li>
|
||||||
|
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#installation">Installation</a></li>
|
||||||
|
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#usage">Usage</a><ul class="nav section-nav flex-column">
|
||||||
|
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#as-an-api">As an API</a><ul class="nav section-nav flex-column">
|
||||||
|
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#redaction-detection-api">Redaction Detection (API)</a></li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#as-a-cli-tool">As a CLI Tool</a><ul class="nav section-nav flex-column">
|
||||||
|
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#table-parsing">Table Parsing</a></li>
|
||||||
|
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#redaction-detection-cli">Redaction Detection (CLI)</a></li>
|
||||||
|
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#layout-parsing">Layout Parsing</a></li>
|
||||||
|
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#figure-detection">Figure Detection</a></li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#running-as-a-service">Running as a service</a><ul class="nav section-nav flex-column">
|
||||||
|
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#building">Building</a></li>
|
||||||
|
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#usage-service">Usage (service)</a></li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</nav></div>
|
||||||
|
|
||||||
|
<div class="sidebar-secondary-item">
|
||||||
|
|
||||||
|
<div class="tocsection sourcelink">
|
||||||
|
<a href="_sources/README.md.txt">
|
||||||
|
<i class="fa-solid fa-file-lines"></i> Show Source
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div></div>
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<footer class="bd-footer-content">
|
||||||
|
|
||||||
|
</footer>
|
||||||
|
|
||||||
|
</main>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
||||||
|
<script src="_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
|
||||||
|
<script src="_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
|
||||||
|
|
||||||
|
<footer class="bd-footer">
|
||||||
|
<div class="bd-footer__inner bd-page-width">
|
||||||
|
|
||||||
|
<div class="footer-items__start">
|
||||||
|
|
||||||
|
<div class="footer-item">
|
||||||
|
|
||||||
|
<p class="copyright">
|
||||||
|
|
||||||
|
© Copyright All rights reserved.
|
||||||
|
<br/>
|
||||||
|
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="footer-item">
|
||||||
|
|
||||||
|
<p class="sphinx-version">
|
||||||
|
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 7.3.7.
|
||||||
|
<br/>
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div class="footer-items__end">
|
||||||
|
|
||||||
|
<div class="footer-item">
|
||||||
|
<p class="theme-version">
|
||||||
|
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
|
||||||
|
</p></div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</footer>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
BIN
docs/build/html/_images/figure_detection.png
vendored
Normal file
BIN
docs/build/html/_images/figure_detection.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 707 KiB |
BIN
docs/build/html/_images/layout_parsing.png
vendored
Normal file
BIN
docs/build/html/_images/layout_parsing.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 568 KiB |
BIN
docs/build/html/_images/redaction_detection.png
vendored
Normal file
BIN
docs/build/html/_images/redaction_detection.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.2 MiB |
BIN
docs/build/html/_images/table_parsing.png
vendored
Normal file
BIN
docs/build/html/_images/table_parsing.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 566 KiB |
178
docs/build/html/_sources/README.md.txt
vendored
Normal file
178
docs/build/html/_sources/README.md.txt
vendored
Normal file
@ -0,0 +1,178 @@
|
|||||||
|
# cv-analysis - Visual (CV-Based) Document Parsing
|
||||||
|
|
||||||
|
parse_pdf()
|
||||||
|
This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
|
||||||
|
previous redactions in documents.
|
||||||
|
|
||||||
|
## API
|
||||||
|
|
||||||
|
Input message:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"targetFilePath": {
|
||||||
|
"pdf": "absolute file path",
|
||||||
|
"vlp_output": "absolute file path"
|
||||||
|
},
|
||||||
|
"responseFilePath": "absolute file path",
|
||||||
|
"operation": "table_image_inference"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Response is uploaded to the storage as specified in the `responseFilePath` field. The structure is as follows:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
...,
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
'pageNum': 0,
|
||||||
|
'bbox': {
|
||||||
|
'x1': 55.3407,
|
||||||
|
'y1': 247.0246,
|
||||||
|
'x2': 558.5602,
|
||||||
|
'y2': 598.0585
|
||||||
|
},
|
||||||
|
'uuid': '2b10c1a2-393c-4fca-b9e3-0ad5b774ac84',
|
||||||
|
'label': 'table',
|
||||||
|
'tableLines': [
|
||||||
|
{
|
||||||
|
'x1': 0,
|
||||||
|
'y1': 16,
|
||||||
|
'x2': 1399,
|
||||||
|
'y2': 16
|
||||||
|
},
|
||||||
|
...
|
||||||
|
],
|
||||||
|
'imageInfo': {
|
||||||
|
'height': 693,
|
||||||
|
'width': 1414
|
||||||
|
}
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone ssh://git@git.iqser.com:2222/rr/cv-analysis.git
|
||||||
|
cd cv-analysis
|
||||||
|
|
||||||
|
python -m venv env
|
||||||
|
source env/bin/activate
|
||||||
|
|
||||||
|
pip install -e .
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
dvc pull
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### As an API
|
||||||
|
|
||||||
|
The module provided functions for the individual tasks that all return some kind of collection of points, depending on
|
||||||
|
the specific task.
|
||||||
|
|
||||||
|
#### Redaction Detection (API)
|
||||||
|
|
||||||
|
The below snippet shows hot to find the outlines of previous redactions.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from cv_analysis.redaction_detection import find_redactions
|
||||||
|
import pdf2image
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
pdf_path = ...
|
||||||
|
page_index = ...
|
||||||
|
|
||||||
|
page = pdf2image.convert_from_path(pdf_path, first_page=page_index, last_page=page_index)[0]
|
||||||
|
page = np.array(page)
|
||||||
|
|
||||||
|
redaction_contours = find_redactions(page)
|
||||||
|
```
|
||||||
|
|
||||||
|
## As a CLI Tool
|
||||||
|
|
||||||
|
Core API functionalities can be used through a CLI.
|
||||||
|
|
||||||
|
### Table Parsing
|
||||||
|
|
||||||
|
The tables parsing utility detects and segments tables into individual cells.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/annotate.py data/test_pdf.pdf 7 --type table
|
||||||
|
```
|
||||||
|
|
||||||
|
The below image shows a parsed table, where each table cell has been detected individually.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### Redaction Detection (CLI)
|
||||||
|
|
||||||
|
The redaction detection utility detects previous redactions in PDFs (filled black rectangles).
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/annotate.py data/test_pdf.pdf 2 --type redaction
|
||||||
|
```
|
||||||
|
|
||||||
|
The below image shows the detected redactions with green outlines.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### Layout Parsing
|
||||||
|
|
||||||
|
The layout parsing utility detects elements such as paragraphs, tables and figures.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/annotate.py data/test_pdf.pdf 7 --type layout
|
||||||
|
```
|
||||||
|
|
||||||
|
The below image shows the detected layout elements on a page.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### Figure Detection
|
||||||
|
|
||||||
|
The figure detection utility detects figures specifically, which can be missed by the generic layout parsing utility.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/annotate.py data/test_pdf.pdf 3 --type figure
|
||||||
|
```
|
||||||
|
|
||||||
|
The below image shows the detected figure on a page.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
## Running as a service
|
||||||
|
|
||||||
|
### Building
|
||||||
|
|
||||||
|
Build base image
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash setup/docker.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Build head image
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker build -f Dockerfile -t cv-analysis . --build-arg BASE_ROOT=""
|
||||||
|
```
|
||||||
|
|
||||||
|
### Usage (service)
|
||||||
|
|
||||||
|
Shell 1
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run --rm --net=host --rm cv-analysis
|
||||||
|
```
|
||||||
|
|
||||||
|
Shell 2
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/client_mock.py --pdf_path /path/to/a/pdf
|
||||||
|
```
|
||||||
37
docs/build/html/_sources/index.rst.txt
vendored
Normal file
37
docs/build/html/_sources/index.rst.txt
vendored
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
.. Keyword Extraction Service documentation master file, created by
|
||||||
|
sphinx-quickstart on Mon Sep 12 12:04:24 2022.
|
||||||
|
You can adapt this file completely to your liking, but it should at least
|
||||||
|
contain the root `toctree` directive.
|
||||||
|
|
||||||
|
=============================================
|
||||||
|
Welcome to CV Analysis Service documentation!
|
||||||
|
=============================================
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
If you'd like to change the looks of things 👉 https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html
|
||||||
|
|
||||||
|
|
||||||
|
Table of Contents
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 3
|
||||||
|
:caption: README
|
||||||
|
|
||||||
|
README.md
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 3
|
||||||
|
:caption: Modules
|
||||||
|
|
||||||
|
modules/cv_analysis
|
||||||
|
modules/serve
|
||||||
|
|
||||||
|
|
||||||
|
Indices and tables
|
||||||
|
==================
|
||||||
|
|
||||||
|
* :ref:`genindex`
|
||||||
|
* :ref:`modindex`
|
||||||
|
* :ref:`search`
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.config.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.config.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.config module
|
||||||
|
==========================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.config
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.figure_detection.figure_detection.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.figure_detection.figure_detection.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.figure\_detection.figure\_detection module
|
||||||
|
=======================================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.figure_detection.figure_detection
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.figure_detection.figures.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.figure_detection.figures.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.figure\_detection.figures module
|
||||||
|
=============================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.figure_detection.figures
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
17
docs/build/html/_sources/modules/cv_analysis.figure_detection.rst.txt
vendored
Normal file
17
docs/build/html/_sources/modules/cv_analysis.figure_detection.rst.txt
vendored
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
cv\_analysis.figure\_detection package
|
||||||
|
======================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.figure_detection
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
Submodules
|
||||||
|
----------
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 4
|
||||||
|
|
||||||
|
cv_analysis.figure_detection.figure_detection
|
||||||
|
cv_analysis.figure_detection.figures
|
||||||
|
cv_analysis.figure_detection.text
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.figure_detection.text.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.figure_detection.text.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.figure\_detection.text module
|
||||||
|
==========================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.figure_detection.text
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.layout_parsing.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.layout_parsing.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.layout\_parsing module
|
||||||
|
===================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.layout_parsing
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.locations.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.locations.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.locations module
|
||||||
|
=============================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.locations
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.redaction_detection.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.redaction_detection.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.redaction\_detection module
|
||||||
|
========================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.redaction_detection
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
30
docs/build/html/_sources/modules/cv_analysis.rst.txt
vendored
Normal file
30
docs/build/html/_sources/modules/cv_analysis.rst.txt
vendored
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
cv\_analysis package
|
||||||
|
====================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
Subpackages
|
||||||
|
-----------
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 4
|
||||||
|
|
||||||
|
cv_analysis.figure_detection
|
||||||
|
cv_analysis.server
|
||||||
|
cv_analysis.utils
|
||||||
|
|
||||||
|
Submodules
|
||||||
|
----------
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 4
|
||||||
|
|
||||||
|
cv_analysis.config
|
||||||
|
cv_analysis.layout_parsing
|
||||||
|
cv_analysis.locations
|
||||||
|
cv_analysis.redaction_detection
|
||||||
|
cv_analysis.table_inference
|
||||||
|
cv_analysis.table_parsing
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.server.pipeline.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.server.pipeline.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.server.pipeline module
|
||||||
|
===================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.server.pipeline
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
15
docs/build/html/_sources/modules/cv_analysis.server.rst.txt
vendored
Normal file
15
docs/build/html/_sources/modules/cv_analysis.server.rst.txt
vendored
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
cv\_analysis.server package
|
||||||
|
===========================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.server
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
Submodules
|
||||||
|
----------
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 4
|
||||||
|
|
||||||
|
cv_analysis.server.pipeline
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.table_inference.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.table_inference.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.table\_inference module
|
||||||
|
====================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.table_inference
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.table_parsing.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.table_parsing.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.table\_parsing module
|
||||||
|
==================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.table_parsing
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.utils.annotate.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.utils.annotate.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.utils.annotate module
|
||||||
|
==================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.utils.annotate
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.utils.banner.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.utils.banner.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.utils.banner module
|
||||||
|
================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.utils.banner
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.utils.connect_rects.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.utils.connect_rects.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.utils.connect\_rects module
|
||||||
|
========================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.utils.connect_rects
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.utils.display.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.utils.display.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.utils.display module
|
||||||
|
=================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.utils.display
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.utils.draw.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.utils.draw.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.utils.draw module
|
||||||
|
==============================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.utils.draw
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.utils.filters.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.utils.filters.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.utils.filters module
|
||||||
|
=================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.utils.filters
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.utils.image_extraction.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.utils.image_extraction.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.utils.image\_extraction module
|
||||||
|
===========================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.utils.image_extraction
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.utils.open_pdf.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.utils.open_pdf.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.utils.open\_pdf module
|
||||||
|
===================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.utils.open_pdf
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.utils.postprocessing.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.utils.postprocessing.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.utils.postprocessing module
|
||||||
|
========================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.utils.postprocessing
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.utils.preprocessing.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.utils.preprocessing.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.utils.preprocessing module
|
||||||
|
=======================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.utils.preprocessing
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
28
docs/build/html/_sources/modules/cv_analysis.utils.rst.txt
vendored
Normal file
28
docs/build/html/_sources/modules/cv_analysis.utils.rst.txt
vendored
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
cv\_analysis.utils package
|
||||||
|
==========================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.utils
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
Submodules
|
||||||
|
----------
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 4
|
||||||
|
|
||||||
|
cv_analysis.utils.annotate
|
||||||
|
cv_analysis.utils.banner
|
||||||
|
cv_analysis.utils.connect_rects
|
||||||
|
cv_analysis.utils.display
|
||||||
|
cv_analysis.utils.draw
|
||||||
|
cv_analysis.utils.filters
|
||||||
|
cv_analysis.utils.image_extraction
|
||||||
|
cv_analysis.utils.open_pdf
|
||||||
|
cv_analysis.utils.postprocessing
|
||||||
|
cv_analysis.utils.preprocessing
|
||||||
|
cv_analysis.utils.structures
|
||||||
|
cv_analysis.utils.test_metrics
|
||||||
|
cv_analysis.utils.utils
|
||||||
|
cv_analysis.utils.visual_logging
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.utils.structures.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.utils.structures.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.utils.structures module
|
||||||
|
====================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.utils.structures
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.utils.test_metrics.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.utils.test_metrics.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.utils.test\_metrics module
|
||||||
|
=======================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.utils.test_metrics
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.utils.utils.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.utils.utils.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.utils.utils module
|
||||||
|
===============================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.utils.utils
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/cv_analysis.utils.visual_logging.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/cv_analysis.utils.visual_logging.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cv\_analysis.utils.visual\_logging module
|
||||||
|
=========================================
|
||||||
|
|
||||||
|
.. automodule:: cv_analysis.utils.visual_logging
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
7
docs/build/html/_sources/modules/serve.rst.txt
vendored
Normal file
7
docs/build/html/_sources/modules/serve.rst.txt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
serve module
|
||||||
|
============
|
||||||
|
|
||||||
|
.. automodule:: serve
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
925
docs/build/html/_static/basic.css
vendored
Normal file
925
docs/build/html/_static/basic.css
vendored
Normal file
@ -0,0 +1,925 @@
|
|||||||
|
/*
|
||||||
|
* basic.css
|
||||||
|
* ~~~~~~~~~
|
||||||
|
*
|
||||||
|
* Sphinx stylesheet -- basic theme.
|
||||||
|
*
|
||||||
|
* :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS.
|
||||||
|
* :license: BSD, see LICENSE for details.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* -- main layout ----------------------------------------------------------- */
|
||||||
|
|
||||||
|
div.clearer {
|
||||||
|
clear: both;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.section::after {
|
||||||
|
display: block;
|
||||||
|
content: '';
|
||||||
|
clear: left;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- relbar ---------------------------------------------------------------- */
|
||||||
|
|
||||||
|
div.related {
|
||||||
|
width: 100%;
|
||||||
|
font-size: 90%;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.related h3 {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.related ul {
|
||||||
|
margin: 0;
|
||||||
|
padding: 0 0 0 10px;
|
||||||
|
list-style: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.related li {
|
||||||
|
display: inline;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.related li.right {
|
||||||
|
float: right;
|
||||||
|
margin-right: 5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- sidebar --------------------------------------------------------------- */
|
||||||
|
|
||||||
|
div.sphinxsidebarwrapper {
|
||||||
|
padding: 10px 5px 0 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar {
|
||||||
|
float: left;
|
||||||
|
width: 270px;
|
||||||
|
margin-left: -100%;
|
||||||
|
font-size: 90%;
|
||||||
|
word-wrap: break-word;
|
||||||
|
overflow-wrap : break-word;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar ul {
|
||||||
|
list-style: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar ul ul,
|
||||||
|
div.sphinxsidebar ul.want-points {
|
||||||
|
margin-left: 20px;
|
||||||
|
list-style: square;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar ul ul {
|
||||||
|
margin-top: 0;
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar form {
|
||||||
|
margin-top: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar input {
|
||||||
|
border: 1px solid #98dbcc;
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-size: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar #searchbox form.search {
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar #searchbox input[type="text"] {
|
||||||
|
float: left;
|
||||||
|
width: 80%;
|
||||||
|
padding: 0.25em;
|
||||||
|
box-sizing: border-box;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar #searchbox input[type="submit"] {
|
||||||
|
float: left;
|
||||||
|
width: 20%;
|
||||||
|
border-left: none;
|
||||||
|
padding: 0.25em;
|
||||||
|
box-sizing: border-box;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
img {
|
||||||
|
border: 0;
|
||||||
|
max-width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- search page ----------------------------------------------------------- */
|
||||||
|
|
||||||
|
ul.search {
|
||||||
|
margin: 10px 0 0 20px;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ul.search li {
|
||||||
|
padding: 5px 0 5px 20px;
|
||||||
|
background-image: url(file.png);
|
||||||
|
background-repeat: no-repeat;
|
||||||
|
background-position: 0 7px;
|
||||||
|
}
|
||||||
|
|
||||||
|
ul.search li a {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
ul.search li p.context {
|
||||||
|
color: #888;
|
||||||
|
margin: 2px 0 0 30px;
|
||||||
|
text-align: left;
|
||||||
|
}
|
||||||
|
|
||||||
|
ul.keywordmatches li.goodmatch a {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- index page ------------------------------------------------------------ */
|
||||||
|
|
||||||
|
table.contentstable {
|
||||||
|
width: 90%;
|
||||||
|
margin-left: auto;
|
||||||
|
margin-right: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.contentstable p.biglink {
|
||||||
|
line-height: 150%;
|
||||||
|
}
|
||||||
|
|
||||||
|
a.biglink {
|
||||||
|
font-size: 1.3em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span.linkdescr {
|
||||||
|
font-style: italic;
|
||||||
|
padding-top: 5px;
|
||||||
|
font-size: 90%;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- general index --------------------------------------------------------- */
|
||||||
|
|
||||||
|
table.indextable {
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.indextable td {
|
||||||
|
text-align: left;
|
||||||
|
vertical-align: top;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.indextable ul {
|
||||||
|
margin-top: 0;
|
||||||
|
margin-bottom: 0;
|
||||||
|
list-style-type: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.indextable > tbody > tr > td > ul {
|
||||||
|
padding-left: 0em;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.indextable tr.pcap {
|
||||||
|
height: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.indextable tr.cap {
|
||||||
|
margin-top: 10px;
|
||||||
|
background-color: #f2f2f2;
|
||||||
|
}
|
||||||
|
|
||||||
|
img.toggler {
|
||||||
|
margin-right: 3px;
|
||||||
|
margin-top: 3px;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.modindex-jumpbox {
|
||||||
|
border-top: 1px solid #ddd;
|
||||||
|
border-bottom: 1px solid #ddd;
|
||||||
|
margin: 1em 0 1em 0;
|
||||||
|
padding: 0.4em;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.genindex-jumpbox {
|
||||||
|
border-top: 1px solid #ddd;
|
||||||
|
border-bottom: 1px solid #ddd;
|
||||||
|
margin: 1em 0 1em 0;
|
||||||
|
padding: 0.4em;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- domain module index --------------------------------------------------- */
|
||||||
|
|
||||||
|
table.modindextable td {
|
||||||
|
padding: 2px;
|
||||||
|
border-collapse: collapse;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- general body styles --------------------------------------------------- */
|
||||||
|
|
||||||
|
div.body {
|
||||||
|
min-width: 360px;
|
||||||
|
max-width: 800px;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.body p, div.body dd, div.body li, div.body blockquote {
|
||||||
|
-moz-hyphens: auto;
|
||||||
|
-ms-hyphens: auto;
|
||||||
|
-webkit-hyphens: auto;
|
||||||
|
hyphens: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
a.headerlink {
|
||||||
|
visibility: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
a:visited {
|
||||||
|
color: #551A8B;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1:hover > a.headerlink,
|
||||||
|
h2:hover > a.headerlink,
|
||||||
|
h3:hover > a.headerlink,
|
||||||
|
h4:hover > a.headerlink,
|
||||||
|
h5:hover > a.headerlink,
|
||||||
|
h6:hover > a.headerlink,
|
||||||
|
dt:hover > a.headerlink,
|
||||||
|
caption:hover > a.headerlink,
|
||||||
|
p.caption:hover > a.headerlink,
|
||||||
|
div.code-block-caption:hover > a.headerlink {
|
||||||
|
visibility: visible;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.body p.caption {
|
||||||
|
text-align: inherit;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.body td {
|
||||||
|
text-align: left;
|
||||||
|
}
|
||||||
|
|
||||||
|
.first {
|
||||||
|
margin-top: 0 !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
p.rubric {
|
||||||
|
margin-top: 30px;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
img.align-left, figure.align-left, .figure.align-left, object.align-left {
|
||||||
|
clear: left;
|
||||||
|
float: left;
|
||||||
|
margin-right: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
img.align-right, figure.align-right, .figure.align-right, object.align-right {
|
||||||
|
clear: right;
|
||||||
|
float: right;
|
||||||
|
margin-left: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
img.align-center, figure.align-center, .figure.align-center, object.align-center {
|
||||||
|
display: block;
|
||||||
|
margin-left: auto;
|
||||||
|
margin-right: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
img.align-default, figure.align-default, .figure.align-default {
|
||||||
|
display: block;
|
||||||
|
margin-left: auto;
|
||||||
|
margin-right: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.align-left {
|
||||||
|
text-align: left;
|
||||||
|
}
|
||||||
|
|
||||||
|
.align-center {
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.align-default {
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.align-right {
|
||||||
|
text-align: right;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- sidebars -------------------------------------------------------------- */
|
||||||
|
|
||||||
|
div.sidebar,
|
||||||
|
aside.sidebar {
|
||||||
|
margin: 0 0 0.5em 1em;
|
||||||
|
border: 1px solid #ddb;
|
||||||
|
padding: 7px;
|
||||||
|
background-color: #ffe;
|
||||||
|
width: 40%;
|
||||||
|
float: right;
|
||||||
|
clear: right;
|
||||||
|
overflow-x: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
p.sidebar-title {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
nav.contents,
|
||||||
|
aside.topic,
|
||||||
|
div.admonition, div.topic, blockquote {
|
||||||
|
clear: left;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- topics ---------------------------------------------------------------- */
|
||||||
|
|
||||||
|
nav.contents,
|
||||||
|
aside.topic,
|
||||||
|
div.topic {
|
||||||
|
border: 1px solid #ccc;
|
||||||
|
padding: 7px;
|
||||||
|
margin: 10px 0 10px 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
p.topic-title {
|
||||||
|
font-size: 1.1em;
|
||||||
|
font-weight: bold;
|
||||||
|
margin-top: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- admonitions ----------------------------------------------------------- */
|
||||||
|
|
||||||
|
div.admonition {
|
||||||
|
margin-top: 10px;
|
||||||
|
margin-bottom: 10px;
|
||||||
|
padding: 7px;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.admonition dt {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
p.admonition-title {
|
||||||
|
margin: 0px 10px 5px 0px;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.body p.centered {
|
||||||
|
text-align: center;
|
||||||
|
margin-top: 25px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- content of sidebars/topics/admonitions -------------------------------- */
|
||||||
|
|
||||||
|
div.sidebar > :last-child,
|
||||||
|
aside.sidebar > :last-child,
|
||||||
|
nav.contents > :last-child,
|
||||||
|
aside.topic > :last-child,
|
||||||
|
div.topic > :last-child,
|
||||||
|
div.admonition > :last-child {
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sidebar::after,
|
||||||
|
aside.sidebar::after,
|
||||||
|
nav.contents::after,
|
||||||
|
aside.topic::after,
|
||||||
|
div.topic::after,
|
||||||
|
div.admonition::after,
|
||||||
|
blockquote::after {
|
||||||
|
display: block;
|
||||||
|
content: '';
|
||||||
|
clear: both;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- tables ---------------------------------------------------------------- */
|
||||||
|
|
||||||
|
table.docutils {
|
||||||
|
margin-top: 10px;
|
||||||
|
margin-bottom: 10px;
|
||||||
|
border: 0;
|
||||||
|
border-collapse: collapse;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.align-center {
|
||||||
|
margin-left: auto;
|
||||||
|
margin-right: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.align-default {
|
||||||
|
margin-left: auto;
|
||||||
|
margin-right: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
table caption span.caption-number {
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
table caption span.caption-text {
|
||||||
|
}
|
||||||
|
|
||||||
|
table.docutils td, table.docutils th {
|
||||||
|
padding: 1px 8px 1px 5px;
|
||||||
|
border-top: 0;
|
||||||
|
border-left: 0;
|
||||||
|
border-right: 0;
|
||||||
|
border-bottom: 1px solid #aaa;
|
||||||
|
}
|
||||||
|
|
||||||
|
th {
|
||||||
|
text-align: left;
|
||||||
|
padding-right: 5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.citation {
|
||||||
|
border-left: solid 1px gray;
|
||||||
|
margin-left: 1px;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.citation td {
|
||||||
|
border-bottom: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
th > :first-child,
|
||||||
|
td > :first-child {
|
||||||
|
margin-top: 0px;
|
||||||
|
}
|
||||||
|
|
||||||
|
th > :last-child,
|
||||||
|
td > :last-child {
|
||||||
|
margin-bottom: 0px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- figures --------------------------------------------------------------- */
|
||||||
|
|
||||||
|
div.figure, figure {
|
||||||
|
margin: 0.5em;
|
||||||
|
padding: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.figure p.caption, figcaption {
|
||||||
|
padding: 0.3em;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.figure p.caption span.caption-number,
|
||||||
|
figcaption span.caption-number {
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.figure p.caption span.caption-text,
|
||||||
|
figcaption span.caption-text {
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- field list styles ----------------------------------------------------- */
|
||||||
|
|
||||||
|
table.field-list td, table.field-list th {
|
||||||
|
border: 0 !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.field-list ul {
|
||||||
|
margin: 0;
|
||||||
|
padding-left: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.field-list p {
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.field-name {
|
||||||
|
-moz-hyphens: manual;
|
||||||
|
-ms-hyphens: manual;
|
||||||
|
-webkit-hyphens: manual;
|
||||||
|
hyphens: manual;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- hlist styles ---------------------------------------------------------- */
|
||||||
|
|
||||||
|
table.hlist {
|
||||||
|
margin: 1em 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.hlist td {
|
||||||
|
vertical-align: top;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- object description styles --------------------------------------------- */
|
||||||
|
|
||||||
|
.sig {
|
||||||
|
font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sig-name, code.descname {
|
||||||
|
background-color: transparent;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sig-name {
|
||||||
|
font-size: 1.1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
code.descname {
|
||||||
|
font-size: 1.2em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sig-prename, code.descclassname {
|
||||||
|
background-color: transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
.optional {
|
||||||
|
font-size: 1.3em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sig-paren {
|
||||||
|
font-size: larger;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sig-param.n {
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* C++ specific styling */
|
||||||
|
|
||||||
|
.sig-inline.c-texpr,
|
||||||
|
.sig-inline.cpp-texpr {
|
||||||
|
font-family: unset;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sig.c .k, .sig.c .kt,
|
||||||
|
.sig.cpp .k, .sig.cpp .kt {
|
||||||
|
color: #0033B3;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sig.c .m,
|
||||||
|
.sig.cpp .m {
|
||||||
|
color: #1750EB;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sig.c .s, .sig.c .sc,
|
||||||
|
.sig.cpp .s, .sig.cpp .sc {
|
||||||
|
color: #067D17;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* -- other body styles ----------------------------------------------------- */
|
||||||
|
|
||||||
|
ol.arabic {
|
||||||
|
list-style: decimal;
|
||||||
|
}
|
||||||
|
|
||||||
|
ol.loweralpha {
|
||||||
|
list-style: lower-alpha;
|
||||||
|
}
|
||||||
|
|
||||||
|
ol.upperalpha {
|
||||||
|
list-style: upper-alpha;
|
||||||
|
}
|
||||||
|
|
||||||
|
ol.lowerroman {
|
||||||
|
list-style: lower-roman;
|
||||||
|
}
|
||||||
|
|
||||||
|
ol.upperroman {
|
||||||
|
list-style: upper-roman;
|
||||||
|
}
|
||||||
|
|
||||||
|
:not(li) > ol > li:first-child > :first-child,
|
||||||
|
:not(li) > ul > li:first-child > :first-child {
|
||||||
|
margin-top: 0px;
|
||||||
|
}
|
||||||
|
|
||||||
|
:not(li) > ol > li:last-child > :last-child,
|
||||||
|
:not(li) > ul > li:last-child > :last-child {
|
||||||
|
margin-bottom: 0px;
|
||||||
|
}
|
||||||
|
|
||||||
|
ol.simple ol p,
|
||||||
|
ol.simple ul p,
|
||||||
|
ul.simple ol p,
|
||||||
|
ul.simple ul p {
|
||||||
|
margin-top: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ol.simple > li:not(:first-child) > p,
|
||||||
|
ul.simple > li:not(:first-child) > p {
|
||||||
|
margin-top: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ol.simple p,
|
||||||
|
ul.simple p {
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
aside.footnote > span,
|
||||||
|
div.citation > span {
|
||||||
|
float: left;
|
||||||
|
}
|
||||||
|
aside.footnote > span:last-of-type,
|
||||||
|
div.citation > span:last-of-type {
|
||||||
|
padding-right: 0.5em;
|
||||||
|
}
|
||||||
|
aside.footnote > p {
|
||||||
|
margin-left: 2em;
|
||||||
|
}
|
||||||
|
div.citation > p {
|
||||||
|
margin-left: 4em;
|
||||||
|
}
|
||||||
|
aside.footnote > p:last-of-type,
|
||||||
|
div.citation > p:last-of-type {
|
||||||
|
margin-bottom: 0em;
|
||||||
|
}
|
||||||
|
aside.footnote > p:last-of-type:after,
|
||||||
|
div.citation > p:last-of-type:after {
|
||||||
|
content: "";
|
||||||
|
clear: both;
|
||||||
|
}
|
||||||
|
|
||||||
|
dl.field-list {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: fit-content(30%) auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
dl.field-list > dt {
|
||||||
|
font-weight: bold;
|
||||||
|
word-break: break-word;
|
||||||
|
padding-left: 0.5em;
|
||||||
|
padding-right: 5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
dl.field-list > dd {
|
||||||
|
padding-left: 0.5em;
|
||||||
|
margin-top: 0em;
|
||||||
|
margin-left: 0em;
|
||||||
|
margin-bottom: 0em;
|
||||||
|
}
|
||||||
|
|
||||||
|
dl {
|
||||||
|
margin-bottom: 15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
dd > :first-child {
|
||||||
|
margin-top: 0px;
|
||||||
|
}
|
||||||
|
|
||||||
|
dd ul, dd table {
|
||||||
|
margin-bottom: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
dd {
|
||||||
|
margin-top: 3px;
|
||||||
|
margin-bottom: 10px;
|
||||||
|
margin-left: 30px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sig dd {
|
||||||
|
margin-top: 0px;
|
||||||
|
margin-bottom: 0px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sig dl {
|
||||||
|
margin-top: 0px;
|
||||||
|
margin-bottom: 0px;
|
||||||
|
}
|
||||||
|
|
||||||
|
dl > dd:last-child,
|
||||||
|
dl > dd:last-child > :last-child {
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
dt:target, span.highlighted {
|
||||||
|
background-color: #fbe54e;
|
||||||
|
}
|
||||||
|
|
||||||
|
rect.highlighted {
|
||||||
|
fill: #fbe54e;
|
||||||
|
}
|
||||||
|
|
||||||
|
dl.glossary dt {
|
||||||
|
font-weight: bold;
|
||||||
|
font-size: 1.1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.versionmodified {
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
.system-message {
|
||||||
|
background-color: #fda;
|
||||||
|
padding: 5px;
|
||||||
|
border: 3px solid red;
|
||||||
|
}
|
||||||
|
|
||||||
|
.footnote:target {
|
||||||
|
background-color: #ffa;
|
||||||
|
}
|
||||||
|
|
||||||
|
.line-block {
|
||||||
|
display: block;
|
||||||
|
margin-top: 1em;
|
||||||
|
margin-bottom: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.line-block .line-block {
|
||||||
|
margin-top: 0;
|
||||||
|
margin-bottom: 0;
|
||||||
|
margin-left: 1.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.guilabel, .menuselection {
|
||||||
|
font-family: sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
|
.accelerator {
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
|
||||||
|
.classifier {
|
||||||
|
font-style: oblique;
|
||||||
|
}
|
||||||
|
|
||||||
|
.classifier:before {
|
||||||
|
font-style: normal;
|
||||||
|
margin: 0 0.5em;
|
||||||
|
content: ":";
|
||||||
|
display: inline-block;
|
||||||
|
}
|
||||||
|
|
||||||
|
abbr, acronym {
|
||||||
|
border-bottom: dotted 1px;
|
||||||
|
cursor: help;
|
||||||
|
}
|
||||||
|
|
||||||
|
.translated {
|
||||||
|
background-color: rgba(207, 255, 207, 0.2)
|
||||||
|
}
|
||||||
|
|
||||||
|
.untranslated {
|
||||||
|
background-color: rgba(255, 207, 207, 0.2)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- code displays --------------------------------------------------------- */
|
||||||
|
|
||||||
|
pre {
|
||||||
|
overflow: auto;
|
||||||
|
overflow-y: hidden; /* fixes display issues on Chrome browsers */
|
||||||
|
}
|
||||||
|
|
||||||
|
pre, div[class*="highlight-"] {
|
||||||
|
clear: both;
|
||||||
|
}
|
||||||
|
|
||||||
|
span.pre {
|
||||||
|
-moz-hyphens: none;
|
||||||
|
-ms-hyphens: none;
|
||||||
|
-webkit-hyphens: none;
|
||||||
|
hyphens: none;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
div[class*="highlight-"] {
|
||||||
|
margin: 1em 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
td.linenos pre {
|
||||||
|
border: 0;
|
||||||
|
background-color: transparent;
|
||||||
|
color: #aaa;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.highlighttable {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.highlighttable tbody {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.highlighttable tr {
|
||||||
|
display: flex;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.highlighttable td {
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.highlighttable td.linenos {
|
||||||
|
padding-right: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.highlighttable td.code {
|
||||||
|
flex: 1;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
.highlight .hll {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.highlight pre,
|
||||||
|
table.highlighttable pre {
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.code-block-caption + div {
|
||||||
|
margin-top: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.code-block-caption {
|
||||||
|
margin-top: 1em;
|
||||||
|
padding: 2px 5px;
|
||||||
|
font-size: small;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.code-block-caption code {
|
||||||
|
background-color: transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.highlighttable td.linenos,
|
||||||
|
span.linenos,
|
||||||
|
div.highlight span.gp { /* gp: Generic.Prompt */
|
||||||
|
user-select: none;
|
||||||
|
-webkit-user-select: text; /* Safari fallback only */
|
||||||
|
-webkit-user-select: none; /* Chrome/Safari */
|
||||||
|
-moz-user-select: none; /* Firefox */
|
||||||
|
-ms-user-select: none; /* IE10+ */
|
||||||
|
}
|
||||||
|
|
||||||
|
div.code-block-caption span.caption-number {
|
||||||
|
padding: 0.1em 0.3em;
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.code-block-caption span.caption-text {
|
||||||
|
}
|
||||||
|
|
||||||
|
div.literal-block-wrapper {
|
||||||
|
margin: 1em 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
code.xref, a code {
|
||||||
|
background-color: transparent;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1 code, h2 code, h3 code, h4 code, h5 code, h6 code {
|
||||||
|
background-color: transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
.viewcode-link {
|
||||||
|
float: right;
|
||||||
|
}
|
||||||
|
|
||||||
|
.viewcode-back {
|
||||||
|
float: right;
|
||||||
|
font-family: sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.viewcode-block:target {
|
||||||
|
margin: -1px -10px;
|
||||||
|
padding: 0 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- math display ---------------------------------------------------------- */
|
||||||
|
|
||||||
|
img.math {
|
||||||
|
vertical-align: middle;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.body div.math p {
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
span.eqno {
|
||||||
|
float: right;
|
||||||
|
}
|
||||||
|
|
||||||
|
span.eqno a.headerlink {
|
||||||
|
position: absolute;
|
||||||
|
z-index: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.math:hover a.headerlink {
|
||||||
|
visibility: visible;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- printout stylesheet --------------------------------------------------- */
|
||||||
|
|
||||||
|
@media print {
|
||||||
|
div.document,
|
||||||
|
div.documentwrapper,
|
||||||
|
div.bodywrapper {
|
||||||
|
margin: 0 !important;
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar,
|
||||||
|
div.related,
|
||||||
|
div.footer,
|
||||||
|
#top-link {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user