Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e3f06da823 | ||
|
|
c25c8d764e | ||
|
|
dcab1e8616 |
@ -10,7 +10,7 @@ omit =
|
||||
*/build_venv/*
|
||||
*/incl/*
|
||||
source =
|
||||
cv_analysis
|
||||
cv_analysis
|
||||
relative_files = True
|
||||
data_file = .coverage
|
||||
|
||||
@ -46,4 +46,4 @@ ignore_errors = True
|
||||
directory = reports
|
||||
|
||||
[xml]
|
||||
output = reports/coverage.xml
|
||||
output = reports/coverage.xml
|
||||
@ -97,4 +97,4 @@ target/
|
||||
*.swp
|
||||
*/*.swp
|
||||
*/*/*.swp
|
||||
*/*/*/*.swp
|
||||
*/*/*/*.swp
|
||||
@ -1,10 +1,7 @@
|
||||
[core]
|
||||
remote = azure_remote
|
||||
remote = vector
|
||||
autostage = true
|
||||
['remote "vector"']
|
||||
url = ssh://vector.iqser.com/research/nonml_cv_doc_parsing/
|
||||
port = 22
|
||||
['remote "azure_remote"']
|
||||
url = azure://cv-sa-dvc/
|
||||
connection_string = "DefaultEndpointsProtocol=https;AccountName=cvsacricket;AccountKey=KOuTAQ6Mp00ePTT5ObYmgaHlxwS1qukY4QU4Kuk7gy/vldneA+ZiKjaOpEFtqKA6Mtym2gQz8THy+ASts/Y1Bw==;EndpointSuffix=core.windows.net"
|
||||
['remote "local"']
|
||||
url = ../dvc_local_remote
|
||||
|
||||
|
||||
78
.gitignore
vendored
78
.gitignore
vendored
@ -1,52 +1,28 @@
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
.pytest*
|
||||
.python-version
|
||||
.DS_Store
|
||||
|
||||
# Project folders
|
||||
scratch/
|
||||
*.vscode/
|
||||
.idea
|
||||
*_app
|
||||
*pytest_cache
|
||||
*joblib
|
||||
*tmp
|
||||
*profiling
|
||||
*logs
|
||||
*docker
|
||||
*drivers
|
||||
*bamboo-specs/target
|
||||
|
||||
# Python specific files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*.ipynb
|
||||
*.ipynb_checkpoints
|
||||
|
||||
# file extensions
|
||||
*.log
|
||||
*.csv
|
||||
*.json
|
||||
*.pkl
|
||||
*.profile
|
||||
*.cbm
|
||||
|
||||
# temp files
|
||||
*.swp
|
||||
*~
|
||||
*.un~
|
||||
|
||||
# keep files
|
||||
!notebooks/*.ipynb
|
||||
|
||||
# keep folders
|
||||
!secrets
|
||||
!data/*
|
||||
!drivers
|
||||
|
||||
# unignore files
|
||||
!bom.*
|
||||
*.egg-info/
|
||||
deskew_model/
|
||||
build_venv/
|
||||
/pdfs/
|
||||
/results/
|
||||
/pdfs/
|
||||
/env/
|
||||
/.idea/
|
||||
/.idea/.gitignore
|
||||
/.idea/misc.xml
|
||||
/.idea/inspectionProfiles/profiles_settings.xml
|
||||
/.idea/table_parsing.iml
|
||||
/.idea/vcs.xml
|
||||
/results/
|
||||
/table_parsing.egg-info
|
||||
/target/
|
||||
/tests/
|
||||
/cv_analysis.egg-info/dependency_links.txt
|
||||
/cv_analysis.egg-info/PKG-INFO
|
||||
/cv_analysis.egg-info/SOURCES.txt
|
||||
/cv_analysis.egg-info/top_level.txt
|
||||
/.vscode/
|
||||
/cv_analysis/test/test_data/example_pages.json
|
||||
/data/metadata_testing_files.csv
|
||||
.coverage
|
||||
/data/
|
||||
/venv/
|
||||
@ -1,30 +0,0 @@
|
||||
include:
|
||||
- project: "Gitlab/gitlab"
|
||||
ref: 0.3.0
|
||||
file: "/ci-templates/research/dvc-versioning-build-release.gitlab-ci.yml"
|
||||
|
||||
variables:
|
||||
NEXUS_PROJECT_DIR: red
|
||||
IMAGENAME: "${CI_PROJECT_NAME}"
|
||||
|
||||
#################################
|
||||
# temp. disable integration tests, b/c they don't cover the CV analysis case yet
|
||||
trigger integration tests:
|
||||
rules:
|
||||
- when: never
|
||||
|
||||
release build:
|
||||
stage: release
|
||||
needs:
|
||||
- job: set custom version
|
||||
artifacts: true
|
||||
optional: true
|
||||
- job: calculate patch version
|
||||
artifacts: true
|
||||
optional: true
|
||||
- job: calculate minor version
|
||||
artifacts: true
|
||||
optional: true
|
||||
- job: build docker nexus
|
||||
artifacts: true
|
||||
#################################
|
||||
@ -1,35 +0,0 @@
|
||||
# CI for services, check gitlab repo for python package CI
|
||||
include:
|
||||
- project: "Gitlab/gitlab"
|
||||
ref: main
|
||||
file: "/ci-templates/research/versioning-build-test-release.gitlab-ci.yml"
|
||||
- project: "Gitlab/gitlab"
|
||||
ref: main
|
||||
file: "/ci-templates/research/docs.gitlab-ci.yml"
|
||||
|
||||
# set project variables here
|
||||
variables:
|
||||
NEXUS_PROJECT_DIR: red # subfolder in Nexus docker-gin where your container will be stored
|
||||
IMAGENAME: $CI_PROJECT_NAME # if the project URL is gitlab.example.com/group-name/project-1, CI_PROJECT_NAME is project-1
|
||||
|
||||
pages:
|
||||
only:
|
||||
- master # KEEP THIS, necessary because `master` branch and not `main` branch
|
||||
|
||||
###################
|
||||
# INTEGRATION TESTS
|
||||
trigger-integration-tests:
|
||||
extends: .integration-tests
|
||||
# ADD THE MODEL BUILD WHICH SHOULD TRIGGER THE INTEGRATION TESTS
|
||||
# needs:
|
||||
# - job: docker-build::model_name
|
||||
# artifacts: true
|
||||
rules:
|
||||
- when: never
|
||||
|
||||
#########
|
||||
# RELEASE
|
||||
release:
|
||||
extends: .release
|
||||
needs:
|
||||
- !reference [.needs-versioning, needs] # leave this line as is
|
||||
@ -1,61 +0,0 @@
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import semver
|
||||
from loguru import logger
|
||||
from semver.version import Version
|
||||
|
||||
logger.remove()
|
||||
logger.add(sys.stdout, level="INFO")
|
||||
|
||||
|
||||
def bashcmd(cmds: list) -> str:
|
||||
try:
|
||||
logger.debug(f"running: {' '.join(cmds)}")
|
||||
return subprocess.run(cmds, check=True, capture_output=True, text=True).stdout.strip("\n")
|
||||
except:
|
||||
logger.warning(f"Error executing the following bash command: {' '.join(cmds)}.")
|
||||
raise
|
||||
|
||||
|
||||
def get_highest_existing_git_version_tag() -> str:
|
||||
"""Get highest versions from git tags depending on bump level"""
|
||||
try:
|
||||
git_tags = bashcmd(["git", "tag", "-l"]).split()
|
||||
semver_compat_tags = list(filter(Version.is_valid, git_tags))
|
||||
highest_git_version_tag = max(semver_compat_tags, key=semver.version.Version.parse)
|
||||
logger.info(f"Highest git version tag: {highest_git_version_tag}")
|
||||
return highest_git_version_tag
|
||||
except:
|
||||
logger.warning("Error getting git version tags")
|
||||
raise
|
||||
|
||||
|
||||
def auto_bump_version() -> bool:
|
||||
active = Path(".autoversion").is_file()
|
||||
logger.debug(f"Automated version bump is set to '{active}'")
|
||||
return active
|
||||
|
||||
|
||||
def main() -> None:
|
||||
poetry_project_version = bashcmd(["poetry", "version", "-s"])
|
||||
|
||||
logger.info(f"Poetry project version: {poetry_project_version}")
|
||||
|
||||
highest_git_version_tag = get_highest_existing_git_version_tag()
|
||||
|
||||
comparison_result = semver.compare(poetry_project_version, highest_git_version_tag)
|
||||
|
||||
if comparison_result in (-1, 0):
|
||||
logger.warning("Poetry version must be greater than git tag version.")
|
||||
if auto_bump_version():
|
||||
logger.info(bashcmd(["poetry", "version", highest_git_version_tag]))
|
||||
sys.exit(0)
|
||||
sys.exit(1)
|
||||
else:
|
||||
logger.info(f"All good: {poetry_project_version} > {highest_git_version_tag}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -1,72 +0,0 @@
|
||||
# See https://pre-commit.com for more information
|
||||
# See https://pre-commit.com/hooks.html for more hooks
|
||||
exclude: ^(docs/|notebooks/|data/|src/configs/|tests/|.hooks/|bom.json)
|
||||
default_language_version:
|
||||
python: python3.10
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-yaml
|
||||
args: [--unsafe] # needed for .gitlab-ci.yml
|
||||
- id: check-toml
|
||||
- id: detect-private-key
|
||||
- id: check-added-large-files
|
||||
args: ['--maxkb=10000']
|
||||
- id: check-case-conflict
|
||||
- id: mixed-line-ending
|
||||
|
||||
# - repo: https://github.com/pre-commit/mirrors-pylint
|
||||
# rev: v3.0.0a5
|
||||
# hooks:
|
||||
# - id: pylint
|
||||
# args:
|
||||
# - --disable=C0111,R0903,E0401
|
||||
# - --max-line-length=120
|
||||
|
||||
- repo: https://github.com/pre-commit/mirrors-isort
|
||||
rev: v5.10.1
|
||||
hooks:
|
||||
- id: isort
|
||||
args:
|
||||
- --profile black
|
||||
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 24.10.0
|
||||
hooks:
|
||||
- id: black
|
||||
# exclude: ^(docs/|notebooks/|data/|src/secrets/)
|
||||
args:
|
||||
- --line-length=120
|
||||
|
||||
- repo: https://github.com/compilerla/conventional-pre-commit
|
||||
rev: v4.0.0
|
||||
hooks:
|
||||
- id: conventional-pre-commit
|
||||
pass_filenames: false
|
||||
stages: [commit-msg]
|
||||
# args: [] # optional: list of Conventional Commits types to allow e.g. [feat, fix, ci, chore, test]
|
||||
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: version-checker
|
||||
name: version-checker
|
||||
entry: python .hooks/poetry_version_check.py
|
||||
language: python
|
||||
always_run: true
|
||||
additional_dependencies:
|
||||
- "semver"
|
||||
- "loguru"
|
||||
|
||||
# - repo: local
|
||||
# hooks:
|
||||
# - id: docker-build-test
|
||||
# name: testing docker build
|
||||
# entry: ./scripts/ops/docker-compose-build-run.sh
|
||||
# language: script
|
||||
# # always_run: true
|
||||
# pass_filenames: false
|
||||
# args: []
|
||||
# stages: [pre-commit]
|
||||
84
Dockerfile
84
Dockerfile
@ -1,78 +1,30 @@
|
||||
###############
|
||||
# BUILDER IMAGE
|
||||
FROM python:3.10-slim as builder
|
||||
FROM python:3.10
|
||||
|
||||
ARG GITLAB_USER
|
||||
ARG GITLAB_ACCESS_TOKEN
|
||||
RUN python -m venv /app/venv
|
||||
ENV PATH="/app/venv/bin:$PATH"
|
||||
|
||||
ARG PYPI_REGISTRY_RESEARCH=https://gitlab.knecon.com/api/v4/groups/19/-/packages/pypi
|
||||
ARG POETRY_SOURCE_REF_RESEARCH=gitlab-research
|
||||
RUN python -m pip install --upgrade pip
|
||||
|
||||
ARG PYPI_REGISTRY_RED=https://gitlab.knecon.com/api/v4/groups/12/-/packages/pypi
|
||||
ARG POETRY_SOURCE_REF_RED=gitlab-red
|
||||
WORKDIR /app/service
|
||||
|
||||
ARG PYPI_REGISTRY_FFORESIGHT=https://gitlab.knecon.com/api/v4/groups/269/-/packages/pypi
|
||||
ARG POETRY_SOURCE_REF_FFORESIGHT=gitlab-fforesight
|
||||
COPY ./requirements.txt ./requirements.txt
|
||||
RUN python3 -m pip install -r requirements.txt
|
||||
|
||||
ARG VERSION=dev
|
||||
COPY ./incl/pyinfra/requirements.txt ./incl/pyinfra/requirements.txt
|
||||
RUN python -m pip install -r incl/pyinfra/requirements.txt
|
||||
|
||||
LABEL maintainer="Research <research@knecon.com>"
|
||||
LABEL version="${VERSION}"
|
||||
COPY ./incl/pdf2image/requirements.txt ./incl/pdf2image/requirements.txt
|
||||
RUN python -m pip install -r incl/pdf2image/requirements.txt
|
||||
|
||||
WORKDIR /app
|
||||
COPY ./incl ./incl
|
||||
|
||||
###########
|
||||
# ENV SETUP
|
||||
ENV PYTHONDONTWRITEBYTECODE=true
|
||||
ENV PYTHONUNBUFFERED=true
|
||||
ENV POETRY_HOME=/opt/poetry
|
||||
ENV PATH="$POETRY_HOME/bin:$PATH"
|
||||
RUN python3 -m pip install -e incl/pyinfra
|
||||
RUN python3 -m pip install -e incl/pdf2image
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl git bash build-essential libffi-dev libssl-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN curl -sSL https://install.python-poetry.org | python3 -
|
||||
RUN poetry --version
|
||||
|
||||
COPY pyproject.toml poetry.lock ./
|
||||
|
||||
RUN poetry config virtualenvs.create true && \
|
||||
poetry config virtualenvs.in-project true && \
|
||||
poetry config installer.max-workers 10 && \
|
||||
poetry config repositories.${POETRY_SOURCE_REF_RESEARCH} ${PYPI_REGISTRY_RESEARCH} && \
|
||||
poetry config http-basic.${POETRY_SOURCE_REF_RESEARCH} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
|
||||
poetry config repositories.${POETRY_SOURCE_REF_RED} ${PYPI_REGISTRY_RED} && \
|
||||
poetry config http-basic.${POETRY_SOURCE_REF_RED} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
|
||||
poetry config repositories.${POETRY_SOURCE_REF_FFORESIGHT} ${PYPI_REGISTRY_FFORESIGHT} && \
|
||||
poetry config http-basic.${POETRY_SOURCE_REF_FFORESIGHT} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
|
||||
poetry install --without=dev,docs,test -vv --no-interaction --no-root
|
||||
|
||||
##################
|
||||
# COPY SOURCE CODE
|
||||
COPY ./config ./config
|
||||
COPY ./src ./src
|
||||
COPY ./cv_analysis ./cv_analysis
|
||||
COPY ./setup.py ./setup.py
|
||||
|
||||
###############
|
||||
# WORKING IMAGE
|
||||
FROM python:3.10-slim
|
||||
RUN python3 -m pip install -e .
|
||||
|
||||
# COPY BILL OF MATERIALS (BOM)
|
||||
COPY bom.json /bom.json
|
||||
|
||||
# COPY SOURCE CODE FROM BUILDER IMAGE
|
||||
COPY --from=builder /app /app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV PATH="/app/.venv/bin:$PATH"
|
||||
|
||||
############
|
||||
# NETWORKING
|
||||
EXPOSE 5000
|
||||
EXPOSE 8080
|
||||
|
||||
################
|
||||
# LAUNCH COMMAND
|
||||
CMD [ "python", "src/serve.py"]
|
||||
CMD ["python3", "-u", "src/serve.py"]
|
||||
94
Makefile
94
Makefile
@ -1,94 +0,0 @@
|
||||
.PHONY: \
|
||||
poetry in-project-venv dev-env use-env install install-dev tests \
|
||||
update-version sync-version-with-git \
|
||||
docker docker-build-run docker-build docker-run \
|
||||
docker-rm docker-rm-container docker-rm-image \
|
||||
pre-commit get-licenses prep-commit \
|
||||
docs sphinx_html sphinx_apidoc bom
|
||||
.DEFAULT_GOAL := run
|
||||
|
||||
export DOCKER=docker
|
||||
export DOCKERFILE=Dockerfile
|
||||
export IMAGE_NAME=cv_analysis_service-image
|
||||
export CONTAINER_NAME=cv_analysis_service-container
|
||||
export HOST_PORT=9999
|
||||
export CONTAINER_PORT=9999
|
||||
export PYTHON_VERSION=python3.10
|
||||
|
||||
# all commands should be executed in the root dir or the project,
|
||||
# specific environments should be deactivated
|
||||
|
||||
poetry: in-project-venv use-env dev-env
|
||||
|
||||
in-project-venv:
|
||||
poetry config virtualenvs.in-project true
|
||||
|
||||
use-env:
|
||||
poetry env use ${PYTHON_VERSION}
|
||||
|
||||
dev-env:
|
||||
poetry install --with dev && poetry update
|
||||
|
||||
install:
|
||||
poetry add $(pkg)
|
||||
|
||||
install-dev:
|
||||
poetry add --dev $(pkg)
|
||||
|
||||
requirements:
|
||||
poetry export --without-hashes --output requirements.txt
|
||||
|
||||
update-version:
|
||||
poetry version prerelease
|
||||
|
||||
sync-version-with-git:
|
||||
git pull -p && poetry version $(git rev-list --tags --max-count=1 | git describe --tags --abbrev=0)
|
||||
|
||||
bom:
|
||||
cyclonedx-py poetry -o bom.json
|
||||
|
||||
docker: docker-rm docker-build-run
|
||||
|
||||
docker-build-run: docker-build docker-run
|
||||
|
||||
docker-build:
|
||||
$(DOCKER) build \
|
||||
--no-cache --progress=plain \
|
||||
-t $(IMAGE_NAME) -f $(DOCKERFILE) \
|
||||
--build-arg USERNAME=${USERNAME} \
|
||||
--build-arg TOKEN=${GITLAB_TOKEN} \
|
||||
.
|
||||
|
||||
docker-run:
|
||||
$(DOCKER) run -it --rm -p $(HOST_PORT):$(CONTAINER_PORT)/tcp --name $(CONTAINER_NAME) $(IMAGE_NAME)
|
||||
|
||||
docker-rm: docker-rm-container docker-rm-image
|
||||
|
||||
docker-rm-container:
|
||||
-$(DOCKER) rm $(CONTAINER_NAME)
|
||||
|
||||
docker-rm-image:
|
||||
-$(DOCKER) image rm $(IMAGE_NAME)
|
||||
|
||||
tests:
|
||||
poetry run pytest ./tests
|
||||
|
||||
prep-commit:
|
||||
docs get-license sync-version-with-git update-version pre-commit
|
||||
|
||||
pre-commit:
|
||||
pre-commit run --all-files
|
||||
|
||||
get-licenses:
|
||||
pip-licenses --format=json --order=license --with-urls > pkg-licenses.json
|
||||
|
||||
docs: sphinx_apidoc sphinx_html
|
||||
|
||||
sphinx_html:
|
||||
poetry run sphinx-build -b html docs/source/ docs/build/html -E -a
|
||||
|
||||
sphinx_apidoc:
|
||||
cp ./README.md ./docs/source/README.md && cp -r ./data ./docs/source/data/ && poetry run sphinx-apidoc ./src -o ./docs/source/modules --no-toc --module-first --follow-links --separate --force
|
||||
|
||||
bom:
|
||||
cyclonedx-py poetry -o bom.json
|
||||
57
README.md
57
README.md
@ -1,60 +1,8 @@
|
||||
# cv-analysis - Visual (CV-Based) Document Parsing
|
||||
# cv-analysis — Visual (CV-Based) Document Parsing
|
||||
|
||||
parse_pdf()
|
||||
This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
|
||||
previous redactions in documents.
|
||||
|
||||
## API
|
||||
|
||||
Input message:
|
||||
|
||||
```json
|
||||
{
|
||||
"targetFilePath": {
|
||||
"pdf": "absolute file path",
|
||||
"vlp_output": "absolute file path"
|
||||
},
|
||||
"responseFilePath": "absolute file path",
|
||||
"operation": "table_image_inference"
|
||||
}
|
||||
```
|
||||
|
||||
Response is uploaded to the storage as specified in the `responseFilePath` field. The structure is as follows:
|
||||
|
||||
```json
|
||||
{
|
||||
...,
|
||||
"data": [
|
||||
{
|
||||
'pageNum': 0,
|
||||
'bbox': {
|
||||
'x1': 55.3407,
|
||||
'y1': 247.0246,
|
||||
'x2': 558.5602,
|
||||
'y2': 598.0585
|
||||
},
|
||||
'uuid': '2b10c1a2-393c-4fca-b9e3-0ad5b774ac84',
|
||||
'label': 'table',
|
||||
'tableLines': [
|
||||
{
|
||||
'x1': 0,
|
||||
'y1': 16,
|
||||
'x2': 1399,
|
||||
'y2': 16
|
||||
},
|
||||
...
|
||||
],
|
||||
'imageInfo': {
|
||||
'height': 693,
|
||||
'width': 1414
|
||||
}
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
@ -83,9 +31,10 @@ The below snippet shows hot to find the outlines of previous redactions.
|
||||
|
||||
```python
|
||||
from cv_analysis.redaction_detection import find_redactions
|
||||
import pdf2image
|
||||
import pdf2image
|
||||
import numpy as np
|
||||
|
||||
|
||||
pdf_path = ...
|
||||
page_index = ...
|
||||
|
||||
|
||||
40
bamboo-specs/pom.xml
Normal file
40
bamboo-specs/pom.xml
Normal file
@ -0,0 +1,40 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs-parent</artifactId>
|
||||
<version>7.1.2</version>
|
||||
<relativePath/>
|
||||
</parent>
|
||||
|
||||
<artifactId>bamboo-specs</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<properties>
|
||||
<sonar.skip>true</sonar.skip>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Test dependencies -->
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<!-- run 'mvn test' to perform offline validation of the plan -->
|
||||
<!-- run 'mvn -Ppublish-specs' to upload the plan to your Bamboo server -->
|
||||
</project>
|
||||
178
bamboo-specs/src/main/java/buildjob/PlanSpec.java
Normal file
178
bamboo-specs/src/main/java/buildjob/PlanSpec.java
Normal file
@ -0,0 +1,178 @@
|
||||
package buildjob;
|
||||
|
||||
import static com.atlassian.bamboo.specs.builders.task.TestParserTask.createJUnitParserTask;
|
||||
|
||||
import java.time.LocalTime;
|
||||
|
||||
import com.atlassian.bamboo.specs.api.BambooSpec;
|
||||
import com.atlassian.bamboo.specs.api.builders.BambooKey;
|
||||
import com.atlassian.bamboo.specs.api.builders.docker.DockerConfiguration;
|
||||
import com.atlassian.bamboo.specs.api.builders.permission.PermissionType;
|
||||
import com.atlassian.bamboo.specs.api.builders.permission.Permissions;
|
||||
import com.atlassian.bamboo.specs.api.builders.permission.PlanPermissions;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.Job;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.Plan;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.PlanIdentifier;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.Stage;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.branches.BranchCleanup;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.branches.PlanBranchManagement;
|
||||
import com.atlassian.bamboo.specs.api.builders.project.Project;
|
||||
import com.atlassian.bamboo.specs.builders.task.CheckoutItem;
|
||||
import com.atlassian.bamboo.specs.builders.task.InjectVariablesTask;
|
||||
import com.atlassian.bamboo.specs.builders.task.ScriptTask;
|
||||
import com.atlassian.bamboo.specs.builders.task.VcsCheckoutTask;
|
||||
import com.atlassian.bamboo.specs.builders.task.CleanWorkingDirectoryTask;
|
||||
import com.atlassian.bamboo.specs.builders.task.VcsTagTask;
|
||||
import com.atlassian.bamboo.specs.builders.trigger.BitbucketServerTrigger;
|
||||
import com.atlassian.bamboo.specs.builders.trigger.ScheduledTrigger;
|
||||
import com.atlassian.bamboo.specs.model.task.InjectVariablesScope;
|
||||
import com.atlassian.bamboo.specs.api.builders.Variable;
|
||||
import com.atlassian.bamboo.specs.util.BambooServer;
|
||||
import com.atlassian.bamboo.specs.builders.task.ScriptTask;
|
||||
import com.atlassian.bamboo.specs.model.task.ScriptTaskProperties.Location;
|
||||
|
||||
/**
|
||||
* Plan configuration for Bamboo.
|
||||
* Learn more on: <a href="https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs">https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs</a>
|
||||
*/
|
||||
@BambooSpec
|
||||
public class PlanSpec {
|
||||
|
||||
private static final String SERVICE_NAME = "cv-analysis";
|
||||
|
||||
private static final String SERVICE_KEY = SERVICE_NAME.toUpperCase().replaceAll("-","").replaceAll("_","");
|
||||
|
||||
/**
|
||||
* Run main to publish plan on Bamboo
|
||||
*/
|
||||
public static void main(final String[] args) throws Exception {
|
||||
//By default credentials are read from the '.credentials' file.
|
||||
BambooServer bambooServer = new BambooServer("http://localhost:8085");
|
||||
|
||||
Plan plan = new PlanSpec().createDockerBuildPlan();
|
||||
bambooServer.publish(plan);
|
||||
PlanPermissions planPermission = new PlanSpec().createPlanPermission(plan.getIdentifier());
|
||||
bambooServer.publish(planPermission);
|
||||
|
||||
Plan secPlan = new PlanSpec().createSecBuild();
|
||||
bambooServer.publish(secPlan);
|
||||
PlanPermissions secPlanPermission = new PlanSpec().createPlanPermission(secPlan.getIdentifier());
|
||||
bambooServer.publish(secPlanPermission);
|
||||
}
|
||||
|
||||
private PlanPermissions createPlanPermission(PlanIdentifier planIdentifier) {
|
||||
Permissions permission = new Permissions()
|
||||
.userPermissions("atlbamboo", PermissionType.EDIT, PermissionType.VIEW, PermissionType.ADMIN, PermissionType.CLONE, PermissionType.BUILD)
|
||||
.groupPermissions("research", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
|
||||
.groupPermissions("Development", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
|
||||
.groupPermissions("QA", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
|
||||
.loggedInUserPermissions(PermissionType.VIEW)
|
||||
.anonymousUserPermissionView();
|
||||
return new PlanPermissions(planIdentifier.getProjectKey(), planIdentifier.getPlanKey()).permissions(permission);
|
||||
}
|
||||
|
||||
private Project project() {
|
||||
return new Project()
|
||||
.name("RED")
|
||||
.key(new BambooKey("RED"));
|
||||
}
|
||||
|
||||
public Plan createDockerBuildPlan() {
|
||||
return new Plan(
|
||||
project(),
|
||||
SERVICE_NAME, new BambooKey(SERVICE_KEY))
|
||||
// .description("Docker build for cv-analysis.")
|
||||
// .variables()
|
||||
.stages(new Stage("Build Stage")
|
||||
.jobs(
|
||||
new Job("Build Job", new BambooKey("BUILD"))
|
||||
.tasks(
|
||||
new CleanWorkingDirectoryTask()
|
||||
.description("Clean working directory.")
|
||||
.enabled(true),
|
||||
new VcsCheckoutTask()
|
||||
.description("Checkout default repository.")
|
||||
.checkoutItems(new CheckoutItem().defaultRepository()),
|
||||
new ScriptTask()
|
||||
.description("Set config and keys.")
|
||||
.location(Location.FILE)
|
||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/key-prepare.sh"),
|
||||
new ScriptTask()
|
||||
.description("Build Docker container.")
|
||||
.location(Location.FILE)
|
||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/docker-build.sh")
|
||||
.argument(SERVICE_NAME),
|
||||
new InjectVariablesTask()
|
||||
.description("Inject git tag.")
|
||||
.path("git.tag")
|
||||
.namespace("g")
|
||||
.scope(InjectVariablesScope.LOCAL),
|
||||
new VcsTagTask()
|
||||
.description("${bamboo.g.gitTag}")
|
||||
.tagName("${bamboo.g.gitTag}")
|
||||
.defaultRepository())
|
||||
.dockerConfiguration(
|
||||
new DockerConfiguration()
|
||||
.image("nexus.iqser.com:5001/infra/release_build:4.5.0")
|
||||
.volume("/var/run/docker.sock", "/var/run/docker.sock")),
|
||||
new Job("Licence Job", new BambooKey("LICENCE"))
|
||||
.enabled(false)
|
||||
.tasks(
|
||||
new VcsCheckoutTask()
|
||||
.description("Checkout default repository.")
|
||||
.checkoutItems(new CheckoutItem().defaultRepository()),
|
||||
new ScriptTask()
|
||||
.description("Build licence.")
|
||||
.location(Location.FILE)
|
||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/create-licence.sh"))
|
||||
.dockerConfiguration(
|
||||
new DockerConfiguration()
|
||||
.image("nexus.iqser.com:5001/infra/maven:3.6.2-jdk-13-3.0.0")
|
||||
.volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml")
|
||||
.volume("/var/run/docker.sock", "/var/run/docker.sock"))))
|
||||
.linkedRepositories("RR / " + SERVICE_NAME)
|
||||
.triggers(
|
||||
new BitbucketServerTrigger())
|
||||
.planBranchManagement(
|
||||
new PlanBranchManagement()
|
||||
.createForVcsBranch()
|
||||
.delete(
|
||||
new BranchCleanup()
|
||||
.whenInactiveInRepositoryAfterDays(14))
|
||||
.notificationForCommitters());
|
||||
}
|
||||
|
||||
public Plan createSecBuild() {
|
||||
return new Plan(project(), SERVICE_NAME + "-Sec", new BambooKey(SERVICE_KEY + "SEC")).description("Security Analysis Plan")
|
||||
.stages(new Stage("Default Stage").jobs(
|
||||
new Job("Sonar Job", new BambooKey("SONAR"))
|
||||
.tasks(
|
||||
new CleanWorkingDirectoryTask()
|
||||
.description("Clean working directory.")
|
||||
.enabled(true),
|
||||
new VcsCheckoutTask()
|
||||
.description("Checkout default repository.")
|
||||
.checkoutItems(new CheckoutItem().defaultRepository()),
|
||||
new ScriptTask()
|
||||
.description("Set config and keys.")
|
||||
.location(Location.FILE)
|
||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/key-prepare.sh"),
|
||||
new ScriptTask()
|
||||
.description("Run Sonarqube scan.")
|
||||
.location(Location.FILE)
|
||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/sonar-scan.sh")
|
||||
.argument(SERVICE_NAME))
|
||||
.dockerConfiguration(
|
||||
new DockerConfiguration()
|
||||
.image("nexus.iqser.com:5001/infra/release_build:4.2.0")
|
||||
.volume("/var/run/docker.sock", "/var/run/docker.sock"))))
|
||||
.linkedRepositories("RR / " + SERVICE_NAME)
|
||||
.triggers(
|
||||
new ScheduledTrigger()
|
||||
.scheduleOnceDaily(LocalTime.of(23, 00)))
|
||||
.planBranchManagement(
|
||||
new PlanBranchManagement()
|
||||
.createForVcsBranchMatching("release.*")
|
||||
.notificationForCommitters());
|
||||
}
|
||||
}
|
||||
19
bamboo-specs/src/main/resources/scripts/create-licence.sh
Executable file
19
bamboo-specs/src/main/resources/scripts/create-licence.sh
Executable file
@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
if [[ \"${bamboo_version_tag}\" != \"dev\" ]]
|
||||
then
|
||||
${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
|
||||
-f ${bamboo_build_working_directory}/pom.xml \
|
||||
versions:set \
|
||||
-DnewVersion=${bamboo_version_tag}
|
||||
|
||||
${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
|
||||
-f ${bamboo_build_working_directory}/pom.xml \
|
||||
-B clean deploy \
|
||||
-e -DdeployAtEnd=true \
|
||||
-Dmaven.wagon.http.ssl.insecure=true \
|
||||
-Dmaven.wagon.http.ssl.allowall=true \
|
||||
-Dmaven.wagon.http.ssl.ignore.validity.dates=true \
|
||||
-DaltDeploymentRepository=iqser_release::default::https://nexus.iqser.com/repository/gin4-platform-releases
|
||||
fi
|
||||
53
bamboo-specs/src/main/resources/scripts/docker-build.sh
Executable file
53
bamboo-specs/src/main/resources/scripts/docker-build.sh
Executable file
@ -0,0 +1,53 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
SERVICE_NAME=$1
|
||||
|
||||
if [[ "$bamboo_planRepository_branchName" == "master" ]]
|
||||
then
|
||||
branchVersion=$(cat version.yaml | grep -Eo "version: .*" | sed -s 's|version: \(.*\)\..*\..*|\1|g')
|
||||
latestVersion=$( semver $(git tag -l "${branchVersion}.*" ) | tail -n1 )
|
||||
newVersion="$(semver $latestVersion -p -i minor)"
|
||||
echo "new release on master with version $newVersion"
|
||||
elif [[ "$bamboo_planRepository_branchName" == release* ]]
|
||||
then
|
||||
branchVersion=$(echo $bamboo_planRepository_branchName | sed -s 's|release\/\([0-9]\+\.[0-9]\+\)\.x|\1|')
|
||||
latestVersion=$( semver $(git tag -l "${branchVersion}.*" ) | tail -n1 )
|
||||
newVersion="$(semver $latestVersion -p -i patch)"
|
||||
echo "new release on $bamboo_planRepository_branchName with version $newVersion"
|
||||
elif [[ "${bamboo_version_tag}" != "dev" ]]
|
||||
then
|
||||
newVersion="${bamboo_version_tag}"
|
||||
echo "new special version bild with $newVersion"
|
||||
else
|
||||
newVersion="${bamboo_planRepository_1_branch}_${bamboo_buildNumber}"
|
||||
echo "gitTag=${newVersion}" > git.tag
|
||||
echo "dev build with tag ${newVersion}"
|
||||
python3 -m venv build_venv
|
||||
source build_venv/bin/activate
|
||||
python3 -m pip install --upgrade pip
|
||||
|
||||
pip install dvc
|
||||
pip install 'dvc[ssh]'
|
||||
dvc pull
|
||||
|
||||
echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iqser.com/repository/python-combind/simple" >> pip.conf
|
||||
echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
|
||||
docker build -f Dockerfile .
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "gitTag=${newVersion}" > git.tag
|
||||
|
||||
python3 -m venv build_venv
|
||||
source build_venv/bin/activate
|
||||
python3 -m pip install --upgrade pip
|
||||
|
||||
pip install dvc
|
||||
pip install 'dvc[ssh]'
|
||||
dvc pull
|
||||
|
||||
echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iqser.com/repository/python-combind/simple" >> pip.conf
|
||||
docker build -f Dockerfile -t nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion} .
|
||||
echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
|
||||
docker push nexus.iqser.com:5001/red/$SERVICE_NAME:${newVersion}
|
||||
8
bamboo-specs/src/main/resources/scripts/key-prepare.sh
Executable file
8
bamboo-specs/src/main/resources/scripts/key-prepare.sh
Executable file
@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
mkdir -p ~/.ssh
|
||||
echo "${bamboo_agent_ssh}" | base64 -d >> ~/.ssh/id_rsa
|
||||
echo "host vector.iqser.com" > ~/.ssh/config
|
||||
echo " user bamboo-agent" >> ~/.ssh/config
|
||||
chmod 600 ~/.ssh/config ~/.ssh/id_rsa
|
||||
67
bamboo-specs/src/main/resources/scripts/sonar-scan.sh
Executable file
67
bamboo-specs/src/main/resources/scripts/sonar-scan.sh
Executable file
@ -0,0 +1,67 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
export JAVA_HOME=/usr/bin/sonar-scanner/jre
|
||||
|
||||
python3 -m venv build_venv
|
||||
source build_venv/bin/activate
|
||||
python3 -m pip install --upgrade pip
|
||||
|
||||
echo "dev setup for unit test and coverage"
|
||||
|
||||
pip install -e incl/pyinfra
|
||||
pip install -r incl/pyinfra/requirements.txt
|
||||
|
||||
pip install -e incl/pdf2image
|
||||
pip install -r incl/pdf2image/requirements.txt
|
||||
|
||||
pip install -e .
|
||||
pip install -r requirements.txt
|
||||
|
||||
|
||||
echo "DVC pull step"
|
||||
dvc pull
|
||||
|
||||
echo "coverage calculation"
|
||||
coverage run -m pytest
|
||||
echo "coverage report generation"
|
||||
coverage report -m
|
||||
coverage xml
|
||||
|
||||
SERVICE_NAME=$1
|
||||
|
||||
echo "dependency-check:aggregate"
|
||||
mkdir -p reports
|
||||
dependency-check --enableExperimental -f JSON -f HTML -f XML \
|
||||
--disableAssembly -s . -o reports --project $SERVICE_NAME --exclude ".git/**" --exclude "venv/**" \
|
||||
--exclude "build_venv/**" --exclude "**/__pycache__/**"
|
||||
|
||||
if [[ -z "${bamboo_repository_pr_key}" ]]
|
||||
then
|
||||
echo "Sonar Scan for branch: ${bamboo_planRepository_1_branch}"
|
||||
/usr/bin/sonar-scanner/bin/sonar-scanner -X\
|
||||
-Dsonar.projectKey=RED_$SERVICE_NAME \
|
||||
-Dsonar.sources=src,cv_analysis \
|
||||
-Dsonar.host.url=https://sonarqube.iqser.com \
|
||||
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
|
||||
-Dsonar.branch.name=${bamboo_planRepository_1_branch} \
|
||||
-Dsonar.dependencyCheck.jsonReportPath=reports/dependency-check-report.json \
|
||||
-Dsonar.dependencyCheck.xmlReportPath=reports/dependency-check-report.xml \
|
||||
-Dsonar.dependencyCheck.htmlReportPath=reports/dependency-check-report.html \
|
||||
-Dsonar.python.coverage.reportPaths=reports/coverage.xml
|
||||
|
||||
else
|
||||
echo "Sonar Scan for PR with key1: ${bamboo_repository_pr_key}"
|
||||
/usr/bin/sonar-scanner/bin/sonar-scanner \
|
||||
-Dsonar.projectKey=RED_$SERVICE_NAME \
|
||||
-Dsonar.sources=src,cv_analysis \
|
||||
-Dsonar.host.url=https://sonarqube.iqser.com \
|
||||
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
|
||||
-Dsonar.pullrequest.key=${bamboo_repository_pr_key} \
|
||||
-Dsonar.pullrequest.branch=${bamboo_repository_pr_sourceBranch} \
|
||||
-Dsonar.pullrequest.base=${bamboo_repository_pr_targetBranch} \
|
||||
-Dsonar.dependencyCheck.jsonReportPath=reports/dependency-check-report.json \
|
||||
-Dsonar.dependencyCheck.xmlReportPath=reports/dependency-check-report.xml \
|
||||
-Dsonar.dependencyCheck.htmlReportPath=reports/dependency-check-report.html \
|
||||
-Dsonar.python.coverage.reportPaths=reports/coverage.xml
|
||||
fi
|
||||
22
bamboo-specs/src/test/java/buildjob/PlanSpecTest.java
Normal file
22
bamboo-specs/src/test/java/buildjob/PlanSpecTest.java
Normal file
@ -0,0 +1,22 @@
|
||||
package buildjob;
|
||||
|
||||
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.Plan;
|
||||
import com.atlassian.bamboo.specs.api.exceptions.PropertiesValidationException;
|
||||
import com.atlassian.bamboo.specs.api.util.EntityPropertiesBuilders;
|
||||
import org.junit.Test;
|
||||
|
||||
public class PlanSpecTest {
|
||||
@Test
|
||||
public void checkYourPlanOffline() throws PropertiesValidationException {
|
||||
Plan plan = new PlanSpec().createDockerBuildPlan();
|
||||
|
||||
EntityPropertiesBuilders.build(plan);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void checkYourSecPlanOffline() throws PropertiesValidationException {
|
||||
Plan secPlan = new PlanSpec().createSecBuild();
|
||||
EntityPropertiesBuilders.build(secPlan);
|
||||
}
|
||||
}
|
||||
@ -1,67 +0,0 @@
|
||||
|
||||
[asyncio]
|
||||
max_concurrent_tasks = 10
|
||||
|
||||
[dynamic_tenant_queues]
|
||||
enabled = true
|
||||
|
||||
[metrics.prometheus]
|
||||
enabled = true
|
||||
prefix = "redactmanager_cv_analysis_service"
|
||||
|
||||
[tracing]
|
||||
enabled = true
|
||||
# possible values "opentelemetry" | "azure_monitor" (Excpects APPLICATIONINSIGHTS_CONNECTION_STRING environment variable.)
|
||||
type = "azure_monitor"
|
||||
|
||||
[tracing.opentelemetry]
|
||||
endpoint = "http://otel-collector-opentelemetry-collector.otel-collector:4318/v1/traces"
|
||||
service_name = "redactmanager_cv_analysis_service"
|
||||
exporter = "otlp"
|
||||
|
||||
[webserver]
|
||||
host = "0.0.0.0"
|
||||
port = 8080
|
||||
|
||||
[rabbitmq]
|
||||
host = "localhost"
|
||||
port = 5672
|
||||
username = ""
|
||||
password = ""
|
||||
heartbeat = 60
|
||||
# Has to be a divider of heartbeat, and shouldn't be too big, since only in these intervals queue interactions happen (like receiving new messages)
|
||||
# This is also the minimum time the service needs to process a message
|
||||
connection_sleep = 5
|
||||
input_queue = "request_queue"
|
||||
output_queue = "response_queue"
|
||||
dead_letter_queue = "dead_letter_queue"
|
||||
|
||||
tenant_event_queue_suffix = "_tenant_event_queue"
|
||||
tenant_event_dlq_suffix = "_tenant_events_dlq"
|
||||
tenant_exchange_name = "tenants-exchange"
|
||||
queue_expiration_time = 300000 # 5 minutes in milliseconds
|
||||
service_request_queue_prefix = "cv_analysis_request_queue"
|
||||
service_request_exchange_name = "cv_analysis_request_exchange"
|
||||
service_response_exchange_name = "cv_analysis_response_exchange"
|
||||
service_dlq_name = "cv_analysis_dlq"
|
||||
|
||||
[storage]
|
||||
backend = "s3"
|
||||
|
||||
[storage.s3]
|
||||
bucket = "redaction"
|
||||
endpoint = "http://127.0.0.1:9000"
|
||||
key = ""
|
||||
secret = ""
|
||||
region = "eu-central-1"
|
||||
|
||||
[storage.azure]
|
||||
container = "redaction"
|
||||
connection_string = ""
|
||||
|
||||
[storage.tenant_server]
|
||||
public_key = ""
|
||||
endpoint = "http://tenant-user-management:8081/internal-api/tenants"
|
||||
|
||||
[kubernetes]
|
||||
pod_name = "test_pod"
|
||||
@ -1,19 +0,0 @@
|
||||
[logging]
|
||||
level = "INFO"
|
||||
visual_logging_level = "DISABLED"
|
||||
visual_logging_output_folder = "/tmp/debug"
|
||||
|
||||
[table_parsing]
|
||||
skip_pages_without_images = true
|
||||
|
||||
[paths]
|
||||
root = "@format {env[ROOT_PATH]}"
|
||||
dvc_data_dir = "${paths.root}/data"
|
||||
pdf_for_testing = "${paths.dvc_data_dir}/pdfs_for_testing"
|
||||
png_for_testing = "${paths.dvc_data_dir}/pngs_for_testing"
|
||||
png_figures_detected = "${paths.png_for_testing}/figures_detected"
|
||||
png_tables_detected = "${paths.png_for_testing}/tables_detected_by_tp"
|
||||
hashed_pdfs_for_testing = "${paths.pdf_for_testing}/hashed"
|
||||
metadata_test_files = "${paths.dvc_data_dir}/metadata_testing_files.csv"
|
||||
test_dir = "${paths.dvc_data_dir}/test"
|
||||
test_data_dir = "${paths.dvc_data_dir}/test/test_data"
|
||||
30
cv_analysis/config.py
Normal file
30
cv_analysis/config.py
Normal file
@ -0,0 +1,30 @@
|
||||
import os
|
||||
|
||||
|
||||
def get_config():
|
||||
return Config()
|
||||
|
||||
|
||||
class Config:
|
||||
def __init__(self):
|
||||
self.logging_level_root = os.environ.get("LOGGING_LEVEL_ROOT", "INFO")
|
||||
|
||||
# visual_logging_level: NOTHING > INFO > DEBUG > ALL
|
||||
self.visual_logging_level = "DISABLED"
|
||||
self.visual_logging_output_folder = "/tmp/debug"
|
||||
|
||||
# locations
|
||||
# FIXME: is everything here necessary?
|
||||
root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
self.dvc_data_dir = os.path.join(root, "data")
|
||||
self.pdf_for_testing = os.path.join(self.dvc_data_dir, "pdfs_for_testing")
|
||||
self.png_for_testing = os.path.join(self.dvc_data_dir, "pngs_for_testing")
|
||||
self.png_figures_detected = os.path.join(self.png_for_testing, "figures_detected")
|
||||
self.png_tables_detected = os.path.join(self.png_for_testing, "tables_detected_by_tp")
|
||||
self.hashed_pdfs_for_testing = os.path.join(self.pdf_for_testing, "hashed")
|
||||
self.metadata_test_files = os.path.join(self.dvc_data_dir, "metadata_testing_files.csv")
|
||||
self.test_dir = os.path.join(root, "test")
|
||||
self.test_data_dir = os.path.join(self.test_dir, "test_data")
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.__getattribute__(key)
|
||||
@ -6,15 +6,15 @@ import numpy as np
|
||||
from cv_analysis.figure_detection.figures import detect_large_coherent_structures
|
||||
from cv_analysis.figure_detection.text import remove_primary_text_regions
|
||||
from cv_analysis.utils.filters import (
|
||||
has_acceptable_format,
|
||||
is_large_enough,
|
||||
has_acceptable_format,
|
||||
is_not_too_large,
|
||||
)
|
||||
from cv_analysis.utils.postprocessing import remove_included
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
|
||||
|
||||
def detect_figures(image: np.ndarray):
|
||||
def detect_figures(image: np.array):
|
||||
max_area = image.shape[0] * image.shape[1] * 0.99
|
||||
min_area = 5000
|
||||
max_width_to_height_ratio = 6
|
||||
@ -24,10 +24,9 @@ def detect_figures(image: np.ndarray):
|
||||
cnts = detect_large_coherent_structures(image)
|
||||
cnts = filter(figure_filter, cnts)
|
||||
|
||||
# rects = map(compose(Rectangle.from_xywh, cv2.boundingRect), (cnts))
|
||||
|
||||
bounding_rects = map(cv2.boundingRect, cnts)
|
||||
rects: list[Rectangle] = remove_included(map(Rectangle.from_xywh, rects))
|
||||
rects = map(cv2.boundingRect, cnts)
|
||||
rects = map(Rectangle.from_xywh, rects)
|
||||
rects = remove_included(rects)
|
||||
|
||||
return rects
|
||||
|
||||
@ -2,7 +2,7 @@ import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
def detect_large_coherent_structures(image: np.ndarray):
|
||||
def detect_large_coherent_structures(image: np.array):
|
||||
"""Detects large coherent structures on an image.
|
||||
Expects an image with binary color space (e.g. threshold applied).
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import itertools
|
||||
from itertools import compress, starmap
|
||||
from itertools import compress
|
||||
from itertools import starmap
|
||||
from operator import __and__
|
||||
|
||||
import cv2
|
||||
@ -7,12 +7,10 @@ import numpy as np
|
||||
|
||||
from cv_analysis.utils.connect_rects import connect_related_rects2
|
||||
from cv_analysis.utils.postprocessing import (
|
||||
has_no_parent,
|
||||
remove_included,
|
||||
remove_overlapping,
|
||||
has_no_parent,
|
||||
)
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
from cv_analysis.utils.visual_logging import vizlogger
|
||||
|
||||
|
||||
# could be dynamic parameter is the scan is noisy
|
||||
@ -48,7 +46,7 @@ def fill_in_component_area(image, rect):
|
||||
return ~image
|
||||
|
||||
|
||||
def parse_layout(image: np.ndarray):
|
||||
def parse_layout(image: np.array):
|
||||
image = image.copy()
|
||||
image_ = image.copy()
|
||||
|
||||
@ -77,7 +75,8 @@ def parse_layout(image: np.ndarray):
|
||||
rects = list(map(Rectangle.from_xywh, rects))
|
||||
rects = remove_included(rects)
|
||||
|
||||
rects = connect_related_rects2(map(lambda r: r.xywh(), rects))
|
||||
rects = map(lambda r: r.xywh(), rects)
|
||||
rects = connect_related_rects2(rects)
|
||||
rects = list(map(Rectangle.from_xywh, rects))
|
||||
rects = remove_included(rects)
|
||||
|
||||
@ -2,9 +2,10 @@ from functools import partial
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from iteration_utilities import first, starfilter # type: ignore
|
||||
from iteration_utilities import first
|
||||
from iteration_utilities._iteration_utilities import starfilter
|
||||
|
||||
from cv_analysis.utils.filters import is_boxy, is_filled, is_large_enough
|
||||
from cv_analysis.utils.filters import is_large_enough, is_filled, is_boxy
|
||||
from cv_analysis.utils.visual_logging import vizlogger
|
||||
|
||||
|
||||
@ -12,7 +13,7 @@ def is_likely_redaction(contour, hierarchy, min_area):
|
||||
return is_filled(hierarchy) and is_boxy(contour) and is_large_enough(contour, min_area)
|
||||
|
||||
|
||||
def find_redactions(image: np.ndarray, min_normalized_area=200000):
|
||||
def find_redactions(image: np.array, min_normalized_area=200000):
|
||||
vizlogger.debug(image, "redactions01_start.png")
|
||||
min_normalized_area /= 200 # Assumes 200 DPI PDF -> image conversion resolution
|
||||
|
||||
@ -29,14 +30,13 @@ def find_redactions(image: np.ndarray, min_normalized_area=200000):
|
||||
contours, hierarchies = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
|
||||
|
||||
try:
|
||||
return list(
|
||||
map(
|
||||
first,
|
||||
starfilter(
|
||||
partial(is_likely_redaction, min_area=min_normalized_area),
|
||||
zip(contours, hierarchies[0]),
|
||||
),
|
||||
)
|
||||
contours = map(
|
||||
first,
|
||||
starfilter(
|
||||
partial(is_likely_redaction, min_area=min_normalized_area),
|
||||
zip(contours, hierarchies[0]),
|
||||
),
|
||||
)
|
||||
return list(contours)
|
||||
except:
|
||||
return []
|
||||
56
cv_analysis/server/pipeline.py
Normal file
56
cv_analysis/server/pipeline.py
Normal file
@ -0,0 +1,56 @@
|
||||
from dataclasses import asdict
|
||||
from operator import truth
|
||||
|
||||
from funcy import lmap, flatten
|
||||
|
||||
from cv_analysis.figure_detection.figure_detection import detect_figures
|
||||
from cv_analysis.table_parsing import parse_tables
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
from pdf2img.conversion import convert_pages_to_images
|
||||
from pdf2img.default_objects.image import ImagePlus, ImageInfo
|
||||
from pdf2img.default_objects.rectangle import RectanglePlus
|
||||
|
||||
|
||||
def get_analysis_pipeline(operation):
|
||||
if operation == "table":
|
||||
return make_analysis_pipeline(parse_tables, table_parsing_formatter, dpi=200)
|
||||
elif operation == "figure":
|
||||
return make_analysis_pipeline(detect_figures, figure_detection_formatter, dpi=200)
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
def make_analysis_pipeline(analysis_fn, formatter, dpi):
|
||||
def analyse_pipeline(pdf: bytes, index=None):
|
||||
def parse_page(page: ImagePlus):
|
||||
image = page.asarray()
|
||||
rects = analysis_fn(image)
|
||||
if not rects:
|
||||
return
|
||||
infos = formatter(rects, page, dpi)
|
||||
return infos
|
||||
|
||||
pages = convert_pages_to_images(pdf, index=index, dpi=dpi)
|
||||
results = map(parse_page, pages)
|
||||
|
||||
yield from flatten(filter(truth, results))
|
||||
|
||||
return analyse_pipeline
|
||||
|
||||
|
||||
def table_parsing_formatter(rects, page: ImagePlus, dpi):
|
||||
def format_rect(rect: Rectangle):
|
||||
rect_plus = RectanglePlus.from_pixels(*rect.xyxy(), page.info, alpha=False, dpi=dpi)
|
||||
return rect_plus.asdict(derotate=True)
|
||||
|
||||
bboxes = lmap(format_rect, rects)
|
||||
|
||||
return {"pageInfo": page.asdict(natural_index=True), "tableCells": bboxes}
|
||||
|
||||
|
||||
def figure_detection_formatter(rects, page, dpi):
|
||||
def format_rect(rect: Rectangle):
|
||||
rect_plus = RectanglePlus.from_pixels(*rect.xyxy(), page.info, alpha=False, dpi=dpi)
|
||||
return asdict(ImageInfo(page.info, rect_plus.asbbox(derotate=False), rect_plus.alpha))
|
||||
|
||||
return lmap(format_rect, rects)
|
||||
135
cv_analysis/table_parsing.py
Normal file
135
cv_analysis/table_parsing.py
Normal file
@ -0,0 +1,135 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
from funcy import lmap, lfilter
|
||||
|
||||
from cv_analysis.layout_parsing import parse_layout
|
||||
from cv_analysis.utils.postprocessing import remove_isolated # xywh_to_vecs, xywh_to_vec_rect, adjacent1d
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
from cv_analysis.utils.visual_logging import vizlogger
|
||||
|
||||
|
||||
def add_external_contours(image, image_h_w_lines_only):
|
||||
|
||||
contours, _ = cv2.findContours(image_h_w_lines_only, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
|
||||
for cnt in contours:
|
||||
x, y, w, h = cv2.boundingRect(cnt)
|
||||
cv2.rectangle(image, (x, y), (x + w, y + h), 255, 1)
|
||||
|
||||
return image
|
||||
|
||||
|
||||
def apply_motion_blur(image: np.array, angle, size=80):
|
||||
"""Solidifies and slightly extends detected lines.
|
||||
|
||||
Args:
|
||||
image (np.array): page image as array
|
||||
angle: direction in which to apply blur, 0 or 90
|
||||
size (int): kernel size; 80 found empirically to work well
|
||||
|
||||
Returns:
|
||||
np.array
|
||||
|
||||
"""
|
||||
k = np.zeros((size, size), dtype=np.float32)
|
||||
vizlogger.debug(k, "tables08_blur_kernel1.png")
|
||||
k[(size - 1) // 2, :] = np.ones(size, dtype=np.float32)
|
||||
vizlogger.debug(k, "tables09_blur_kernel2.png")
|
||||
k = cv2.warpAffine(
|
||||
k,
|
||||
cv2.getRotationMatrix2D((size / 2 - 0.5, size / 2 - 0.5), angle, 1.0),
|
||||
(size, size),
|
||||
)
|
||||
vizlogger.debug(k, "tables10_blur_kernel3.png")
|
||||
k = k * (1.0 / np.sum(k))
|
||||
vizlogger.debug(k, "tables11_blur_kernel4.png")
|
||||
blurred = cv2.filter2D(image, -1, k)
|
||||
return blurred
|
||||
|
||||
|
||||
def isolate_vertical_and_horizontal_components(img_bin):
|
||||
"""Identifies and reinforces horizontal and vertical lines in a binary image.
|
||||
|
||||
Args:
|
||||
img_bin (np.array): array corresponding to single binarized page image
|
||||
bounding_rects (list): list of layout boxes of the form (x, y, w, h), potentially containing tables
|
||||
|
||||
Returns:
|
||||
np.array
|
||||
"""
|
||||
line_min_width = 48
|
||||
kernel_h = np.ones((1, line_min_width), np.uint8)
|
||||
kernel_v = np.ones((line_min_width, 1), np.uint8)
|
||||
|
||||
img_bin_h = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernel_h)
|
||||
img_bin_v = cv2.morphologyEx(img_bin, cv2.MORPH_OPEN, kernel_v)
|
||||
img_lines_raw = img_bin_v | img_bin_h
|
||||
|
||||
kernel_h = np.ones((1, 30), np.uint8)
|
||||
kernel_v = np.ones((30, 1), np.uint8)
|
||||
img_bin_h = cv2.dilate(img_bin_h, kernel_h, iterations=2)
|
||||
img_bin_v = cv2.dilate(img_bin_v, kernel_v, iterations=2)
|
||||
|
||||
img_bin_h = apply_motion_blur(img_bin_h, 0)
|
||||
img_bin_v = apply_motion_blur(img_bin_v, 90)
|
||||
|
||||
img_bin_extended = img_bin_h | img_bin_v
|
||||
|
||||
th1, img_bin_extended = cv2.threshold(img_bin_extended, 120, 255, cv2.THRESH_BINARY)
|
||||
img_bin_final = cv2.dilate(img_bin_extended, np.ones((1, 1), np.uint8), iterations=1)
|
||||
# add contours before lines are extended by blurring
|
||||
img_bin_final = add_external_contours(img_bin_final, img_lines_raw)
|
||||
|
||||
return img_bin_final
|
||||
|
||||
|
||||
def find_table_layout_boxes(image: np.array):
|
||||
def is_large_enough(box):
|
||||
(x, y, w, h) = box
|
||||
if w * h >= 100000:
|
||||
return Rectangle.from_xywh(box)
|
||||
|
||||
layout_boxes = parse_layout(image)
|
||||
a = lmap(is_large_enough, layout_boxes)
|
||||
return lmap(is_large_enough, layout_boxes)
|
||||
|
||||
|
||||
def preprocess(image: np.array):
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image
|
||||
_, image = cv2.threshold(image, 195, 255, cv2.THRESH_BINARY)
|
||||
return ~image
|
||||
|
||||
|
||||
def turn_connected_components_into_rects(image: np.array):
|
||||
def is_large_enough(stat):
|
||||
x1, y1, w, h, area = stat
|
||||
return area > 2000 and w > 35 and h > 25
|
||||
|
||||
_, _, stats, _ = cv2.connectedComponentsWithStats(~image, connectivity=8, ltype=cv2.CV_32S)
|
||||
|
||||
stats = lfilter(is_large_enough, stats)
|
||||
if stats:
|
||||
stats = np.vstack(stats)
|
||||
return stats[:, :-1][2:]
|
||||
return []
|
||||
|
||||
|
||||
def parse_tables(image: np.array, show=False):
|
||||
"""Runs the full table parsing process.
|
||||
|
||||
Args:
|
||||
image (np.array): single PDF page, converted to a numpy array
|
||||
|
||||
Returns:
|
||||
list: list of rectangles corresponding to table cells
|
||||
"""
|
||||
|
||||
image = preprocess(image)
|
||||
image = isolate_vertical_and_horizontal_components(image)
|
||||
rects = turn_connected_components_into_rects(image)
|
||||
# print(rects, "\n\n")
|
||||
rects = list(map(Rectangle.from_xywh, rects))
|
||||
# print(rects, "\n\n")
|
||||
rects = remove_isolated(rects)
|
||||
# print(rects, "\n\n")
|
||||
|
||||
return rects
|
||||
@ -1,13 +1,13 @@
|
||||
def make_art():
|
||||
art = r"""
|
||||
__
|
||||
_ |@@|
|
||||
__
|
||||
_ |@@|
|
||||
/ \ \--/ __ .__ .__
|
||||
) O|----| | __ ___ __ _____ ____ _____ | | ___.__. _____|__| ______
|
||||
/ / \ }{ /\ )_ / _\\ \/ / ______ \__ \ / \\__ \ | | | | |/ ___/ |/ ___/
|
||||
)/ /\__/\ \__O (__ \ / /_____/ / __ \| | \/ __ \| |_\___ |\___ \| |\___ \
|
||||
|/ (--/\--) \__/ \_/ (______/___|__(______/____/\____/_____/|__/_____/
|
||||
/ _)( )(_
|
||||
`---''---`
|
||||
|/ (--/\--) \__/ \_/ (______/___|__(______/____/\____/_____/|__/_____/
|
||||
/ _)( )(_
|
||||
`---''---`
|
||||
"""
|
||||
return art
|
||||
@ -1,4 +1,4 @@
|
||||
from itertools import combinations, product, starmap
|
||||
from itertools import combinations, starmap, product
|
||||
from typing import Iterable
|
||||
|
||||
|
||||
@ -41,12 +41,7 @@ def has_correct_position1(rect_pair):
|
||||
return any(
|
||||
[
|
||||
any(
|
||||
[
|
||||
abs(x1 - x2) <= 10,
|
||||
abs(y1 - y2) <= 10,
|
||||
abs(x1 + w1 - (x2 + w2)) <= 10,
|
||||
abs(y1 + h1 - (y2 + h2)) <= 10,
|
||||
]
|
||||
[abs(x1 - x2) <= 10, abs(y1 - y2) <= 10, abs(x1 + w1 - (x2 + w2)) <= 10, abs(y1 + h1 - (y2 + h2)) <= 10]
|
||||
),
|
||||
any(
|
||||
[
|
||||
@ -1,13 +1,6 @@
|
||||
import os
|
||||
|
||||
import cv2
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
# if os.environ.get("USER") == "isaac":
|
||||
# import matplotlib
|
||||
|
||||
# matplotlib.use("module://matplotlib-backend-wezterm")
|
||||
|
||||
|
||||
def show_image_cv2(image, maxdim=700):
|
||||
h, w, c = image.shape
|
||||
@ -4,6 +4,7 @@ from cv_analysis.utils import copy_and_normalize_channels
|
||||
|
||||
|
||||
def draw_contours(image, contours, color=None, annotate=False):
|
||||
|
||||
image = copy_and_normalize_channels(image)
|
||||
|
||||
for cont in contours:
|
||||
@ -1,11 +1,12 @@
|
||||
import pdf2image
|
||||
from numpy import array, ndarray
|
||||
import pdf2image
|
||||
from PIL import Image
|
||||
|
||||
from cv_analysis.utils.preprocessing import preprocess_page_array
|
||||
|
||||
|
||||
def open_pdf(pdf, first_page=0, last_page=None):
|
||||
|
||||
first_page += 1
|
||||
last_page = None if last_page is None else last_page + 1
|
||||
|
||||
@ -1,28 +1,27 @@
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from itertools import compress, starmap
|
||||
from typing import Iterable, List
|
||||
from itertools import starmap, compress
|
||||
from typing import Iterable
|
||||
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
|
||||
|
||||
def remove_overlapping(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
|
||||
def remove_overlapping(rectangles: Iterable[Rectangle]) -> list[Rectangle]:
|
||||
def overlap(a: Rectangle, rect2: Rectangle) -> float:
|
||||
return a.intersection(rect2) > 0
|
||||
|
||||
def does_not_overlap(rect: Rectangle, rectangles: Iterable[Rectangle]) -> bool:
|
||||
def does_not_overlap(rect: Rectangle, rectangles: Iterable[Rectangle]) -> list:
|
||||
return not any(overlap(rect, rect2) for rect2 in rectangles if not rect == rect2)
|
||||
|
||||
rectangles = list(filter(partial(does_not_overlap, rectangles=rectangles), rectangles))
|
||||
return rectangles
|
||||
|
||||
|
||||
def remove_included(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
|
||||
def remove_included(rectangles: Iterable[Rectangle]) -> list[Rectangle]:
|
||||
keep = [rect for rect in rectangles if not rect.is_included(rectangles)]
|
||||
return keep
|
||||
|
||||
|
||||
def __remove_isolated_unsorted(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
|
||||
def __remove_isolated_unsorted(rectangles: Iterable[Rectangle]) -> list[Rectangle]:
|
||||
def is_connected(rect: Rectangle, rectangles: Iterable[Rectangle]):
|
||||
return any(rect.adjacent(rect2) for rect2 in rectangles if not rect == rect2)
|
||||
|
||||
@ -30,7 +29,7 @@ def __remove_isolated_unsorted(rectangles: Iterable[Rectangle]) -> List[Rectangl
|
||||
return rectangles
|
||||
|
||||
|
||||
def __remove_isolated_sorted(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
|
||||
def __remove_isolated_sorted(rectangles: Iterable[Rectangle]) -> list[Rectangle]:
|
||||
def is_connected(left, center, right):
|
||||
return any([left.adjacent(center), center.adjacent(right)])
|
||||
|
||||
@ -43,7 +42,7 @@ def __remove_isolated_sorted(rectangles: Iterable[Rectangle]) -> List[Rectangle]
|
||||
return rectangles
|
||||
|
||||
|
||||
def remove_isolated(rectangles: Iterable[Rectangle], input_unsorted=True) -> List[Rectangle]:
|
||||
def remove_isolated(rectangles: Iterable[Rectangle], input_unsorted=True) -> list[Rectangle]:
|
||||
return (__remove_isolated_unsorted if input_unsorted else __remove_isolated_sorted)(rectangles)
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import cv2
|
||||
from numpy import frombuffer, ndarray
|
||||
import cv2
|
||||
|
||||
|
||||
def preprocess_page_array(page):
|
||||
@ -10,6 +10,7 @@ def preprocess_page_array(page):
|
||||
|
||||
|
||||
def page2image(page):
|
||||
|
||||
if type(page) == bytes:
|
||||
page = frombuffer(page)
|
||||
elif type(page) == ndarray:
|
||||
@ -1,23 +1,12 @@
|
||||
from json import dumps
|
||||
from typing import Iterable
|
||||
|
||||
from typing import Iterable
|
||||
import numpy as np
|
||||
from funcy import identity # type: ignore
|
||||
from funcy import identity
|
||||
|
||||
|
||||
class Rectangle:
|
||||
def __init__(
|
||||
self,
|
||||
x1=None,
|
||||
y1=None,
|
||||
w=None,
|
||||
h=None,
|
||||
x2=None,
|
||||
y2=None,
|
||||
indent=4,
|
||||
format="xywh",
|
||||
discrete=True,
|
||||
):
|
||||
def __init__(self, x1=None, y1=None, w=None, h=None, x2=None, y2=None, indent=4, format="xywh", discrete=True):
|
||||
make_discrete = int if discrete else identity
|
||||
|
||||
try:
|
||||
@ -122,13 +111,7 @@ class Rectangle:
|
||||
|
||||
@classmethod
|
||||
def from_dict_xywh(cls, xywh_dict, discrete=True):
|
||||
return cls(
|
||||
x1=xywh_dict["x"],
|
||||
y1=xywh_dict["y"],
|
||||
w=xywh_dict["width"],
|
||||
h=xywh_dict["height"],
|
||||
discrete=discrete,
|
||||
)
|
||||
return cls(x1=xywh_dict["x"], y1=xywh_dict["y"], w=xywh_dict["width"], h=xywh_dict["height"], discrete=discrete)
|
||||
|
||||
def __str__(self):
|
||||
return dumps(self.json(), indent=self.indent)
|
||||
@ -1,7 +1,5 @@
|
||||
from typing import Iterable
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
|
||||
|
||||
@ -28,6 +26,7 @@ def compute_page_iou(results_boxes: Iterable[Rectangle], ground_truth_boxes: Ite
|
||||
|
||||
|
||||
def compute_document_score(results_dict, annotation_dict):
|
||||
|
||||
page_weights = np.array([len(page["cells"]) for page in annotation_dict["pages"]])
|
||||
page_weights = page_weights / sum(page_weights)
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
import cv2
|
||||
from numpy import generic
|
||||
import cv2
|
||||
|
||||
|
||||
def copy_and_normalize_channels(image):
|
||||
|
||||
image = image.copy()
|
||||
try:
|
||||
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
|
||||
@ -1,11 +1,9 @@
|
||||
import os
|
||||
|
||||
from pyinfra.config.loader import load_settings # type: ignore
|
||||
|
||||
from cv_analysis.config import get_config
|
||||
from cv_analysis.utils.display import save_image
|
||||
|
||||
settings = get_config()
|
||||
CV_CONFIG = get_config()
|
||||
|
||||
|
||||
class VisualLogger:
|
||||
@ -41,4 +39,4 @@ class VisualLogger:
|
||||
return self.level == "ALL"
|
||||
|
||||
|
||||
vizlogger = VisualLogger(settings.logging.visual_logging_level, settings.logging.visual_logging_output_folder)
|
||||
vizlogger = VisualLogger(CV_CONFIG.visual_logging_level, CV_CONFIG.visual_logging_output_folder)
|
||||
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@ -1,30 +0,0 @@
|
||||
#!/bin/bash
|
||||
python_version=$1
|
||||
gitlab_user=$2
|
||||
gitlab_personal_access_token=$3
|
||||
|
||||
# cookiecutter https://gitlab.knecon.com/knecon/research/template-python-project.git --checkout master
|
||||
# latest_dir=$(ls -td -- */ | head -n 1) # should be the dir cookiecutter just created
|
||||
|
||||
# cd $latest_dir
|
||||
|
||||
pyenv install $python_version
|
||||
pyenv local $python_version
|
||||
pyenv shell $python_version
|
||||
|
||||
pip install --upgrade pip
|
||||
pip install poetry
|
||||
|
||||
poetry config installer.max-workers 10
|
||||
# research package registry
|
||||
poetry config repositories.gitlab-research https://gitlab.knecon.com/api/v4/groups/19/-/packages/pypi
|
||||
poetry config http-basic.gitlab-research ${gitlab_user} ${gitlab_personal_access_token}
|
||||
# redactmanager package registry
|
||||
poetry config repositories.gitlab-red https://gitlab.knecon.com/api/v4/groups/12/-/packages/pypi
|
||||
poetry config http-basic.gitlab-red ${gitlab_user} ${gitlab_personal_access_token}
|
||||
|
||||
poetry env use $(pyenv which python)
|
||||
poetry install --with=dev
|
||||
poetry update
|
||||
|
||||
source .venv/bin/activate
|
||||
@ -28,4 +28,4 @@ services:
|
||||
volumes:
|
||||
- /opt/bitnami/rabbitmq/.rabbitmq/:/data/bitnami
|
||||
volumes:
|
||||
mdata:
|
||||
mdata:
|
||||
4
docs/build/html/.buildinfo
vendored
4
docs/build/html/.buildinfo
vendored
@ -1,4 +0,0 @@
|
||||
# Sphinx build info version 1
|
||||
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
|
||||
config: 04e9c6c5d3e412413c2949e598da60dc
|
||||
tags: 645f666f9bcd5a90fca523b33c5a78b7
|
||||
BIN
docs/build/html/.doctrees/README.doctree
vendored
BIN
docs/build/html/.doctrees/README.doctree
vendored
Binary file not shown.
BIN
docs/build/html/.doctrees/environment.pickle
vendored
BIN
docs/build/html/.doctrees/environment.pickle
vendored
Binary file not shown.
BIN
docs/build/html/.doctrees/index.doctree
vendored
BIN
docs/build/html/.doctrees/index.doctree
vendored
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
docs/build/html/.doctrees/modules/serve.doctree
vendored
BIN
docs/build/html/.doctrees/modules/serve.doctree
vendored
Binary file not shown.
657
docs/build/html/README.html
vendored
657
docs/build/html/README.html
vendored
@ -1,657 +0,0 @@
|
||||
|
||||
<!DOCTYPE html>
|
||||
|
||||
|
||||
<html lang="en" data-content_root="./" >
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
|
||||
<title>cv-analysis - Visual (CV-Based) Document Parsing — CV Analysis Service 2.5.2 documentation</title>
|
||||
|
||||
|
||||
|
||||
<script data-cfasync="false">
|
||||
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
||||
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
|
||||
</script>
|
||||
|
||||
<!-- Loaded before other Sphinx assets -->
|
||||
<link href="_static/styles/theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
|
||||
<link href="_static/styles/bootstrap.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
|
||||
<link href="_static/styles/pydata-sphinx-theme.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
|
||||
|
||||
|
||||
<link href="_static/vendor/fontawesome/6.5.1/css/all.min.css?digest=8d27b9dea8ad943066ae" rel="stylesheet" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-solid-900.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-brands-400.woff2" />
|
||||
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.5.1/webfonts/fa-regular-400.woff2" />
|
||||
|
||||
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=a746c00c" />
|
||||
<link rel="stylesheet" type="text/css" href="https://assets.readthedocs.org/static/css/badge_only.css" />
|
||||
|
||||
<!-- Pre-loaded scripts that we'll load fully later -->
|
||||
<link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae" />
|
||||
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae" />
|
||||
<script src="_static/vendor/fontawesome/6.5.1/js/all.min.js?digest=8d27b9dea8ad943066ae"></script>
|
||||
|
||||
<script src="_static/documentation_options.js?v=afc61bbc"></script>
|
||||
<script src="_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script>DOCUMENTATION_OPTIONS.pagename = 'README';</script>
|
||||
<script async="async" src="https://assets.readthedocs.org/static/javascript/readthedocs-doc-embed.js"></script>
|
||||
<link rel="index" title="Index" href="genindex.html" />
|
||||
<link rel="search" title="Search" href="search.html" />
|
||||
<link rel="next" title="cv_analysis package" href="modules/cv_analysis.html" />
|
||||
<link rel="prev" title="Welcome to CV Analysis Service documentation!" href="index.html" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
|
||||
<!-- RTD Extra Head -->
|
||||
|
||||
<link rel="stylesheet" href="https://assets.readthedocs.org/static/css/readthedocs-doc-embed.css" type="text/css" />
|
||||
|
||||
<script type="application/json" id="READTHEDOCS_DATA">{"ad_free": "", "api_host": "", "builder": "sphinx", "canonical_url": "", "docroot": "", "features": {"docsearch_disabled": false}, "global_analytics_code": null, "language": "", "page": "README", "programming_language": "", "project": "", "source_suffix": ".md", "subprojects": {}, "theme": "", "user_analytics_code": null, "version": ""}</script>
|
||||
|
||||
<!--
|
||||
Using this variable directly instead of using `JSON.parse` is deprecated.
|
||||
The READTHEDOCS_DATA global variable will be removed in the future.
|
||||
-->
|
||||
<script type="text/javascript">
|
||||
READTHEDOCS_DATA = JSON.parse(document.getElementById('READTHEDOCS_DATA').innerHTML);
|
||||
</script>
|
||||
|
||||
<script type="text/javascript" src="https://assets.readthedocs.org/static/javascript/readthedocs-analytics.js" async="async"></script>
|
||||
|
||||
<!-- end RTD <extrahead> -->
|
||||
</head>
|
||||
|
||||
|
||||
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
||||
|
||||
|
||||
|
||||
<a id="pst-skip-link" class="skip-link" href="#main-content">Skip to main content</a>
|
||||
|
||||
<div id="pst-scroll-pixel-helper"></div>
|
||||
|
||||
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
||||
<i class="fa-solid fa-arrow-up"></i>
|
||||
Back to top
|
||||
</button>
|
||||
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
name="__primary"
|
||||
id="__primary"/>
|
||||
<label class="overlay overlay-primary" for="__primary"></label>
|
||||
|
||||
<input type="checkbox"
|
||||
class="sidebar-toggle"
|
||||
name="__secondary"
|
||||
id="__secondary"/>
|
||||
<label class="overlay overlay-secondary" for="__secondary"></label>
|
||||
|
||||
<div class="search-button__wrapper">
|
||||
<div class="search-button__overlay"></div>
|
||||
<div class="search-button__search-container">
|
||||
<form class="bd-search d-flex align-items-center"
|
||||
action="search.html"
|
||||
method="get">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<input type="search"
|
||||
class="form-control"
|
||||
name="q"
|
||||
id="search-input"
|
||||
placeholder="Search the docs ..."
|
||||
aria-label="Search the docs ..."
|
||||
autocomplete="off"
|
||||
autocorrect="off"
|
||||
autocapitalize="off"
|
||||
spellcheck="false"/>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
||||
</form></div>
|
||||
</div>
|
||||
|
||||
<header class="bd-header navbar navbar-expand-lg bd-navbar">
|
||||
<div class="bd-header__inner bd-page-width">
|
||||
<label class="sidebar-toggle primary-toggle" for="__primary">
|
||||
<span class="fa-solid fa-bars"></span>
|
||||
</label>
|
||||
|
||||
|
||||
<div class="col-lg-3 navbar-header-items__start">
|
||||
|
||||
<div class="navbar-item">
|
||||
|
||||
|
||||
|
||||
<a class="navbar-brand logo" href="index.html">
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<img src="_static/logo.png" class="logo__image only-light" alt="CV Analysis Service 2.5.2 documentation - Home"/>
|
||||
<script>document.write(`<img src="_static/logo.png" class="logo__image only-dark" alt="CV Analysis Service 2.5.2 documentation - Home"/>`);</script>
|
||||
|
||||
|
||||
</a></div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="col-lg-9 navbar-header-items">
|
||||
|
||||
<div class="me-auto navbar-header-items__center">
|
||||
|
||||
<div class="navbar-item">
|
||||
<nav class="navbar-nav">
|
||||
<ul class="bd-navbar-elements navbar-nav">
|
||||
|
||||
<li class="nav-item current active">
|
||||
<a class="nav-link nav-internal" href="#">
|
||||
cv-analysis - Visual (CV-Based) Document Parsing
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
<li class="nav-item">
|
||||
<a class="nav-link nav-internal" href="modules/cv_analysis.html">
|
||||
cv_analysis package
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
<li class="nav-item">
|
||||
<a class="nav-link nav-internal" href="modules/serve.html">
|
||||
serve module
|
||||
</a>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav></div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<div class="navbar-header-items__end">
|
||||
|
||||
<div class="navbar-item navbar-persistent--container">
|
||||
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<span class="search-button__default-text">Search</span>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="navbar-item">
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
|
||||
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
|
||||
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
|
||||
</button>
|
||||
`);
|
||||
</script></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<div class="navbar-persistent--mobile">
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<i class="fa-solid fa-magnifying-glass"></i>
|
||||
<span class="search-button__default-text">Search</span>
|
||||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||||
</button>
|
||||
`);
|
||||
</script>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
|
||||
<span class="fa-solid fa-outdent"></span>
|
||||
</label>
|
||||
|
||||
</div>
|
||||
|
||||
</header>
|
||||
|
||||
|
||||
<div class="bd-container">
|
||||
<div class="bd-container__inner bd-page-width">
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-primary bd-sidebar">
|
||||
|
||||
|
||||
|
||||
<div class="sidebar-header-items sidebar-primary__section">
|
||||
|
||||
|
||||
<div class="sidebar-header-items__center">
|
||||
|
||||
<div class="navbar-item">
|
||||
<nav class="navbar-nav">
|
||||
<ul class="bd-navbar-elements navbar-nav">
|
||||
|
||||
<li class="nav-item current active">
|
||||
<a class="nav-link nav-internal" href="#">
|
||||
cv-analysis - Visual (CV-Based) Document Parsing
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
<li class="nav-item">
|
||||
<a class="nav-link nav-internal" href="modules/cv_analysis.html">
|
||||
cv_analysis package
|
||||
</a>
|
||||
</li>
|
||||
|
||||
|
||||
<li class="nav-item">
|
||||
<a class="nav-link nav-internal" href="modules/serve.html">
|
||||
serve module
|
||||
</a>
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav></div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div class="sidebar-header-items__end">
|
||||
|
||||
<div class="navbar-item">
|
||||
|
||||
<script>
|
||||
document.write(`
|
||||
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||||
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
|
||||
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
|
||||
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
|
||||
</button>
|
||||
`);
|
||||
</script></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="sidebar-primary-items__start sidebar-primary__section">
|
||||
<div class="sidebar-primary-item">
|
||||
<nav class="bd-docs-nav bd-links"
|
||||
aria-label="Section Navigation">
|
||||
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
|
||||
<div class="bd-toc-item navbar-nav"></div>
|
||||
</nav></div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="sidebar-primary-items__end sidebar-primary__section">
|
||||
</div>
|
||||
|
||||
<div id="rtd-footer-container"></div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<main id="main-content" class="bd-main">
|
||||
|
||||
|
||||
<div class="bd-content">
|
||||
<div class="bd-article-container">
|
||||
|
||||
<div class="bd-header-article">
|
||||
<div class="header-article-items header-article__inner">
|
||||
|
||||
<div class="header-article-items__start">
|
||||
|
||||
<div class="header-article-item">
|
||||
|
||||
|
||||
|
||||
<nav aria-label="Breadcrumb">
|
||||
<ul class="bd-breadcrumbs">
|
||||
|
||||
<li class="breadcrumb-item breadcrumb-home">
|
||||
<a href="index.html" class="nav-link" aria-label="Home">
|
||||
<i class="fa-solid fa-home"></i>
|
||||
</a>
|
||||
</li>
|
||||
<li class="breadcrumb-item active" aria-current="page">cv-analysis...</li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
<div id="searchbox"></div>
|
||||
<article class="bd-article">
|
||||
|
||||
<section id="cv-analysis-visual-cv-based-document-parsing">
|
||||
<h1>cv-analysis - Visual (CV-Based) Document Parsing<a class="headerlink" href="#cv-analysis-visual-cv-based-document-parsing" title="Link to this heading">#</a></h1>
|
||||
<p>parse_pdf()
|
||||
This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
|
||||
previous redactions in documents.</p>
|
||||
<section id="api">
|
||||
<h2>API<a class="headerlink" href="#api" title="Link to this heading">#</a></h2>
|
||||
<p>Input message:</p>
|
||||
<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
|
||||
<span class="w"> </span><span class="nt">"targetFilePath"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<span class="w"> </span><span class="nt">"pdf"</span><span class="p">:</span><span class="w"> </span><span class="s2">"absolute file path"</span><span class="p">,</span>
|
||||
<span class="w"> </span><span class="nt">"vlp_output"</span><span class="p">:</span><span class="w"> </span><span class="s2">"absolute file path"</span>
|
||||
<span class="w"> </span><span class="p">},</span>
|
||||
<span class="w"> </span><span class="nt">"responseFilePath"</span><span class="p">:</span><span class="w"> </span><span class="s2">"absolute file path"</span><span class="p">,</span>
|
||||
<span class="w"> </span><span class="nt">"operation"</span><span class="p">:</span><span class="w"> </span><span class="s2">"table_image_inference"</span>
|
||||
<span class="p">}</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Response is uploaded to the storage as specified in the <code class="docutils literal notranslate"><span class="pre">responseFilePath</span></code> field. The structure is as follows:</p>
|
||||
<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
|
||||
<span class="w"> </span><span class="err">...</span><span class="p">,</span>
|
||||
<span class="w"> </span><span class="nt">"data"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
|
||||
<span class="w"> </span><span class="p">{</span>
|
||||
<span class="w"> </span><span class="err">'pageNum'</span><span class="p">:</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span>
|
||||
<span class="w"> </span><span class="err">'bbox'</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<span class="w"> </span><span class="err">'x</span><span class="mi">1</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="mf">55.3407</span><span class="p">,</span>
|
||||
<span class="w"> </span><span class="err">'y</span><span class="mi">1</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="mf">247.0246</span><span class="p">,</span>
|
||||
<span class="w"> </span><span class="err">'x</span><span class="mi">2</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="mf">558.5602</span><span class="p">,</span>
|
||||
<span class="w"> </span><span class="err">'y</span><span class="mi">2</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="mf">598.0585</span>
|
||||
<span class="w"> </span><span class="p">},</span>
|
||||
<span class="w"> </span><span class="err">'uuid'</span><span class="p">:</span><span class="w"> </span><span class="err">'</span><span class="mi">2</span><span class="err">b</span><span class="mi">10</span><span class="err">c</span><span class="mi">1</span><span class="err">a</span><span class="mi">2-393</span><span class="err">c</span><span class="mi">-4</span><span class="kc">f</span><span class="err">ca</span><span class="mi">-</span><span class="err">b</span><span class="mf">9e3-0</span><span class="err">ad</span><span class="mi">5</span><span class="err">b</span><span class="mi">774</span><span class="err">ac</span><span class="mi">84</span><span class="err">'</span><span class="p">,</span>
|
||||
<span class="w"> </span><span class="err">'label'</span><span class="p">:</span><span class="w"> </span><span class="err">'</span><span class="kc">ta</span><span class="err">ble'</span><span class="p">,</span>
|
||||
<span class="w"> </span><span class="err">'</span><span class="kc">ta</span><span class="err">bleLi</span><span class="kc">nes</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
|
||||
<span class="w"> </span><span class="p">{</span>
|
||||
<span class="w"> </span><span class="err">'x</span><span class="mi">1</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span>
|
||||
<span class="w"> </span><span class="err">'y</span><span class="mi">1</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="mi">16</span><span class="p">,</span>
|
||||
<span class="w"> </span><span class="err">'x</span><span class="mi">2</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="mi">1399</span><span class="p">,</span>
|
||||
<span class="w"> </span><span class="err">'y</span><span class="mi">2</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="mi">16</span>
|
||||
<span class="w"> </span><span class="p">},</span>
|
||||
<span class="w"> </span><span class="err">...</span>
|
||||
<span class="w"> </span><span class="p">],</span>
|
||||
<span class="w"> </span><span class="err">'imageI</span><span class="kc">nf</span><span class="err">o'</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<span class="w"> </span><span class="err">'heigh</span><span class="kc">t</span><span class="err">'</span><span class="p">:</span><span class="w"> </span><span class="mi">693</span><span class="p">,</span>
|
||||
<span class="w"> </span><span class="err">'wid</span><span class="kc">t</span><span class="err">h'</span><span class="p">:</span><span class="w"> </span><span class="mi">1414</span>
|
||||
<span class="w"> </span><span class="p">}</span>
|
||||
<span class="w"> </span><span class="p">},</span>
|
||||
<span class="w"> </span><span class="err">...</span>
|
||||
<span class="w"> </span><span class="p">]</span>
|
||||
<span class="p">}</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="installation">
|
||||
<h2>Installation<a class="headerlink" href="#installation" title="Link to this heading">#</a></h2>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>git<span class="w"> </span>clone<span class="w"> </span>ssh://git@git.iqser.com:2222/rr/cv-analysis.git
|
||||
<span class="nb">cd</span><span class="w"> </span>cv-analysis
|
||||
|
||||
python<span class="w"> </span>-m<span class="w"> </span>venv<span class="w"> </span>env
|
||||
<span class="nb">source</span><span class="w"> </span>env/bin/activate
|
||||
|
||||
pip<span class="w"> </span>install<span class="w"> </span>-e<span class="w"> </span>.
|
||||
pip<span class="w"> </span>install<span class="w"> </span>-r<span class="w"> </span>requirements.txt
|
||||
|
||||
dvc<span class="w"> </span>pull
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="usage">
|
||||
<h2>Usage<a class="headerlink" href="#usage" title="Link to this heading">#</a></h2>
|
||||
<section id="as-an-api">
|
||||
<h3>As an API<a class="headerlink" href="#as-an-api" title="Link to this heading">#</a></h3>
|
||||
<p>The module provided functions for the individual tasks that all return some kind of collection of points, depending on
|
||||
the specific task.</p>
|
||||
<section id="redaction-detection-api">
|
||||
<h4>Redaction Detection (API)<a class="headerlink" href="#redaction-detection-api" title="Link to this heading">#</a></h4>
|
||||
<p>The below snippet shows hot to find the outlines of previous redactions.</p>
|
||||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">cv_analysis.redaction_detection</span> <span class="kn">import</span> <span class="n">find_redactions</span>
|
||||
<span class="kn">import</span> <span class="nn">pdf2image</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
|
||||
<span class="n">pdf_path</span> <span class="o">=</span> <span class="o">...</span>
|
||||
<span class="n">page_index</span> <span class="o">=</span> <span class="o">...</span>
|
||||
|
||||
<span class="n">page</span> <span class="o">=</span> <span class="n">pdf2image</span><span class="o">.</span><span class="n">convert_from_path</span><span class="p">(</span><span class="n">pdf_path</span><span class="p">,</span> <span class="n">first_page</span><span class="o">=</span><span class="n">page_index</span><span class="p">,</span> <span class="n">last_page</span><span class="o">=</span><span class="n">page_index</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">page</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">page</span><span class="p">)</span>
|
||||
|
||||
<span class="n">redaction_contours</span> <span class="o">=</span> <span class="n">find_redactions</span><span class="p">(</span><span class="n">page</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
</section>
|
||||
<section id="as-a-cli-tool">
|
||||
<h2>As a CLI Tool<a class="headerlink" href="#as-a-cli-tool" title="Link to this heading">#</a></h2>
|
||||
<p>Core API functionalities can be used through a CLI.</p>
|
||||
<section id="table-parsing">
|
||||
<h3>Table Parsing<a class="headerlink" href="#table-parsing" title="Link to this heading">#</a></h3>
|
||||
<p>The tables parsing utility detects and segments tables into individual cells.</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">7</span><span class="w"> </span>--type<span class="w"> </span>table
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The below image shows a parsed table, where each table cell has been detected individually.</p>
|
||||
<p><img alt="Table Parsing Demonstration" src="_images/table_parsing.png" /></p>
|
||||
</section>
|
||||
<section id="redaction-detection-cli">
|
||||
<h3>Redaction Detection (CLI)<a class="headerlink" href="#redaction-detection-cli" title="Link to this heading">#</a></h3>
|
||||
<p>The redaction detection utility detects previous redactions in PDFs (filled black rectangles).</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">2</span><span class="w"> </span>--type<span class="w"> </span>redaction
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The below image shows the detected redactions with green outlines.</p>
|
||||
<p><img alt="Redaction Detection Demonstration" src="_images/redaction_detection.png" /></p>
|
||||
</section>
|
||||
<section id="layout-parsing">
|
||||
<h3>Layout Parsing<a class="headerlink" href="#layout-parsing" title="Link to this heading">#</a></h3>
|
||||
<p>The layout parsing utility detects elements such as paragraphs, tables and figures.</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">7</span><span class="w"> </span>--type<span class="w"> </span>layout
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The below image shows the detected layout elements on a page.</p>
|
||||
<p><img alt="Layout Parsing Demonstration" src="_images/layout_parsing.png" /></p>
|
||||
</section>
|
||||
<section id="figure-detection">
|
||||
<h3>Figure Detection<a class="headerlink" href="#figure-detection" title="Link to this heading">#</a></h3>
|
||||
<p>The figure detection utility detects figures specifically, which can be missed by the generic layout parsing utility.</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/annotate.py<span class="w"> </span>data/test_pdf.pdf<span class="w"> </span><span class="m">3</span><span class="w"> </span>--type<span class="w"> </span>figure
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The below image shows the detected figure on a page.</p>
|
||||
<p><img alt="Figure Detection Demonstration" src="_images/figure_detection.png" /></p>
|
||||
</section>
|
||||
</section>
|
||||
<section id="running-as-a-service">
|
||||
<h2>Running as a service<a class="headerlink" href="#running-as-a-service" title="Link to this heading">#</a></h2>
|
||||
<section id="building">
|
||||
<h3>Building<a class="headerlink" href="#building" title="Link to this heading">#</a></h3>
|
||||
<p>Build base image</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>bash<span class="w"> </span>setup/docker.sh
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Build head image</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>build<span class="w"> </span>-f<span class="w"> </span>Dockerfile<span class="w"> </span>-t<span class="w"> </span>cv-analysis<span class="w"> </span>.<span class="w"> </span>--build-arg<span class="w"> </span><span class="nv">BASE_ROOT</span><span class="o">=</span><span class="s2">""</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="usage-service">
|
||||
<h3>Usage (service)<a class="headerlink" href="#usage-service" title="Link to this heading">#</a></h3>
|
||||
<p>Shell 1</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>run<span class="w"> </span>--rm<span class="w"> </span>--net<span class="o">=</span>host<span class="w"> </span>--rm<span class="w"> </span>cv-analysis
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Shell 2</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>scripts/client_mock.py<span class="w"> </span>--pdf_path<span class="w"> </span>/path/to/a/pdf
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
|
||||
</article>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<footer class="prev-next-footer">
|
||||
|
||||
<div class="prev-next-area">
|
||||
<a class="left-prev"
|
||||
href="index.html"
|
||||
title="previous page">
|
||||
<i class="fa-solid fa-angle-left"></i>
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">previous</p>
|
||||
<p class="prev-next-title">Welcome to CV Analysis Service documentation!</p>
|
||||
</div>
|
||||
</a>
|
||||
<a class="right-next"
|
||||
href="modules/cv_analysis.html"
|
||||
title="next page">
|
||||
<div class="prev-next-info">
|
||||
<p class="prev-next-subtitle">next</p>
|
||||
<p class="prev-next-title">cv_analysis package</p>
|
||||
</div>
|
||||
<i class="fa-solid fa-angle-right"></i>
|
||||
</a>
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
|
||||
|
||||
|
||||
<div class="sidebar-secondary-item">
|
||||
<div
|
||||
id="pst-page-navigation-heading-2"
|
||||
class="page-toc tocsection onthispage">
|
||||
<i class="fa-solid fa-list"></i> On this page
|
||||
</div>
|
||||
<nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
|
||||
<ul class="visible nav section-nav flex-column">
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#api">API</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#installation">Installation</a></li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#usage">Usage</a><ul class="nav section-nav flex-column">
|
||||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#as-an-api">As an API</a><ul class="nav section-nav flex-column">
|
||||
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#redaction-detection-api">Redaction Detection (API)</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#as-a-cli-tool">As a CLI Tool</a><ul class="nav section-nav flex-column">
|
||||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#table-parsing">Table Parsing</a></li>
|
||||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#redaction-detection-cli">Redaction Detection (CLI)</a></li>
|
||||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#layout-parsing">Layout Parsing</a></li>
|
||||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#figure-detection">Figure Detection</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#running-as-a-service">Running as a service</a><ul class="nav section-nav flex-column">
|
||||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#building">Building</a></li>
|
||||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#usage-service">Usage (service)</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</nav></div>
|
||||
|
||||
<div class="sidebar-secondary-item">
|
||||
|
||||
<div class="tocsection sourcelink">
|
||||
<a href="_sources/README.md.txt">
|
||||
<i class="fa-solid fa-file-lines"></i> Show Source
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
</div>
|
||||
<footer class="bd-footer-content">
|
||||
|
||||
</footer>
|
||||
|
||||
</main>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
||||
<script src="_static/scripts/bootstrap.js?digest=8d27b9dea8ad943066ae"></script>
|
||||
<script src="_static/scripts/pydata-sphinx-theme.js?digest=8d27b9dea8ad943066ae"></script>
|
||||
|
||||
<footer class="bd-footer">
|
||||
<div class="bd-footer__inner bd-page-width">
|
||||
|
||||
<div class="footer-items__start">
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
<p class="copyright">
|
||||
|
||||
© Copyright All rights reserved.
|
||||
<br/>
|
||||
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="footer-item">
|
||||
|
||||
<p class="sphinx-version">
|
||||
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 7.3.7.
|
||||
<br/>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div class="footer-items__end">
|
||||
|
||||
<div class="footer-item">
|
||||
<p class="theme-version">
|
||||
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.15.2.
|
||||
</p></div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
BIN
docs/build/html/_images/figure_detection.png
vendored
BIN
docs/build/html/_images/figure_detection.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 707 KiB |
BIN
docs/build/html/_images/layout_parsing.png
vendored
BIN
docs/build/html/_images/layout_parsing.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 568 KiB |
BIN
docs/build/html/_images/redaction_detection.png
vendored
BIN
docs/build/html/_images/redaction_detection.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 3.2 MiB |
BIN
docs/build/html/_images/table_parsing.png
vendored
BIN
docs/build/html/_images/table_parsing.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 566 KiB |
178
docs/build/html/_sources/README.md.txt
vendored
178
docs/build/html/_sources/README.md.txt
vendored
@ -1,178 +0,0 @@
|
||||
# cv-analysis - Visual (CV-Based) Document Parsing
|
||||
|
||||
parse_pdf()
|
||||
This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
|
||||
previous redactions in documents.
|
||||
|
||||
## API
|
||||
|
||||
Input message:
|
||||
|
||||
```json
|
||||
{
|
||||
"targetFilePath": {
|
||||
"pdf": "absolute file path",
|
||||
"vlp_output": "absolute file path"
|
||||
},
|
||||
"responseFilePath": "absolute file path",
|
||||
"operation": "table_image_inference"
|
||||
}
|
||||
```
|
||||
|
||||
Response is uploaded to the storage as specified in the `responseFilePath` field. The structure is as follows:
|
||||
|
||||
```json
|
||||
{
|
||||
...,
|
||||
"data": [
|
||||
{
|
||||
'pageNum': 0,
|
||||
'bbox': {
|
||||
'x1': 55.3407,
|
||||
'y1': 247.0246,
|
||||
'x2': 558.5602,
|
||||
'y2': 598.0585
|
||||
},
|
||||
'uuid': '2b10c1a2-393c-4fca-b9e3-0ad5b774ac84',
|
||||
'label': 'table',
|
||||
'tableLines': [
|
||||
{
|
||||
'x1': 0,
|
||||
'y1': 16,
|
||||
'x2': 1399,
|
||||
'y2': 16
|
||||
},
|
||||
...
|
||||
],
|
||||
'imageInfo': {
|
||||
'height': 693,
|
||||
'width': 1414
|
||||
}
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
git clone ssh://git@git.iqser.com:2222/rr/cv-analysis.git
|
||||
cd cv-analysis
|
||||
|
||||
python -m venv env
|
||||
source env/bin/activate
|
||||
|
||||
pip install -e .
|
||||
pip install -r requirements.txt
|
||||
|
||||
dvc pull
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### As an API
|
||||
|
||||
The module provided functions for the individual tasks that all return some kind of collection of points, depending on
|
||||
the specific task.
|
||||
|
||||
#### Redaction Detection (API)
|
||||
|
||||
The below snippet shows hot to find the outlines of previous redactions.
|
||||
|
||||
```python
|
||||
from cv_analysis.redaction_detection import find_redactions
|
||||
import pdf2image
|
||||
import numpy as np
|
||||
|
||||
pdf_path = ...
|
||||
page_index = ...
|
||||
|
||||
page = pdf2image.convert_from_path(pdf_path, first_page=page_index, last_page=page_index)[0]
|
||||
page = np.array(page)
|
||||
|
||||
redaction_contours = find_redactions(page)
|
||||
```
|
||||
|
||||
## As a CLI Tool
|
||||
|
||||
Core API functionalities can be used through a CLI.
|
||||
|
||||
### Table Parsing
|
||||
|
||||
The tables parsing utility detects and segments tables into individual cells.
|
||||
|
||||
```bash
|
||||
python scripts/annotate.py data/test_pdf.pdf 7 --type table
|
||||
```
|
||||
|
||||
The below image shows a parsed table, where each table cell has been detected individually.
|
||||
|
||||

|
||||
|
||||
### Redaction Detection (CLI)
|
||||
|
||||
The redaction detection utility detects previous redactions in PDFs (filled black rectangles).
|
||||
|
||||
```bash
|
||||
python scripts/annotate.py data/test_pdf.pdf 2 --type redaction
|
||||
```
|
||||
|
||||
The below image shows the detected redactions with green outlines.
|
||||
|
||||

|
||||
|
||||
### Layout Parsing
|
||||
|
||||
The layout parsing utility detects elements such as paragraphs, tables and figures.
|
||||
|
||||
```bash
|
||||
python scripts/annotate.py data/test_pdf.pdf 7 --type layout
|
||||
```
|
||||
|
||||
The below image shows the detected layout elements on a page.
|
||||
|
||||

|
||||
|
||||
### Figure Detection
|
||||
|
||||
The figure detection utility detects figures specifically, which can be missed by the generic layout parsing utility.
|
||||
|
||||
```bash
|
||||
python scripts/annotate.py data/test_pdf.pdf 3 --type figure
|
||||
```
|
||||
|
||||
The below image shows the detected figure on a page.
|
||||
|
||||

|
||||
|
||||
## Running as a service
|
||||
|
||||
### Building
|
||||
|
||||
Build base image
|
||||
|
||||
```bash
|
||||
bash setup/docker.sh
|
||||
```
|
||||
|
||||
Build head image
|
||||
|
||||
```bash
|
||||
docker build -f Dockerfile -t cv-analysis . --build-arg BASE_ROOT=""
|
||||
```
|
||||
|
||||
### Usage (service)
|
||||
|
||||
Shell 1
|
||||
|
||||
```bash
|
||||
docker run --rm --net=host --rm cv-analysis
|
||||
```
|
||||
|
||||
Shell 2
|
||||
|
||||
```bash
|
||||
python scripts/client_mock.py --pdf_path /path/to/a/pdf
|
||||
```
|
||||
37
docs/build/html/_sources/index.rst.txt
vendored
37
docs/build/html/_sources/index.rst.txt
vendored
@ -1,37 +0,0 @@
|
||||
.. Keyword Extraction Service documentation master file, created by
|
||||
sphinx-quickstart on Mon Sep 12 12:04:24 2022.
|
||||
You can adapt this file completely to your liking, but it should at least
|
||||
contain the root `toctree` directive.
|
||||
|
||||
=============================================
|
||||
Welcome to CV Analysis Service documentation!
|
||||
=============================================
|
||||
|
||||
.. note::
|
||||
|
||||
If you'd like to change the looks of things 👉 https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html
|
||||
|
||||
|
||||
Table of Contents
|
||||
-----------------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 3
|
||||
:caption: README
|
||||
|
||||
README.md
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 3
|
||||
:caption: Modules
|
||||
|
||||
modules/cv_analysis
|
||||
modules/serve
|
||||
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
@ -1,7 +0,0 @@
|
||||
cv\_analysis.config module
|
||||
==========================
|
||||
|
||||
.. automodule:: cv_analysis.config
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@ -1,7 +0,0 @@
|
||||
cv\_analysis.figure\_detection.figure\_detection module
|
||||
=======================================================
|
||||
|
||||
.. automodule:: cv_analysis.figure_detection.figure_detection
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@ -1,7 +0,0 @@
|
||||
cv\_analysis.figure\_detection.figures module
|
||||
=============================================
|
||||
|
||||
.. automodule:: cv_analysis.figure_detection.figures
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@ -1,17 +0,0 @@
|
||||
cv\_analysis.figure\_detection package
|
||||
======================================
|
||||
|
||||
.. automodule:: cv_analysis.figure_detection
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
cv_analysis.figure_detection.figure_detection
|
||||
cv_analysis.figure_detection.figures
|
||||
cv_analysis.figure_detection.text
|
||||
@ -1,7 +0,0 @@
|
||||
cv\_analysis.figure\_detection.text module
|
||||
==========================================
|
||||
|
||||
.. automodule:: cv_analysis.figure_detection.text
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@ -1,7 +0,0 @@
|
||||
cv\_analysis.layout\_parsing module
|
||||
===================================
|
||||
|
||||
.. automodule:: cv_analysis.layout_parsing
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@ -1,7 +0,0 @@
|
||||
cv\_analysis.locations module
|
||||
=============================
|
||||
|
||||
.. automodule:: cv_analysis.locations
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@ -1,7 +0,0 @@
|
||||
cv\_analysis.redaction\_detection module
|
||||
========================================
|
||||
|
||||
.. automodule:: cv_analysis.redaction_detection
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
@ -1,30 +0,0 @@
|
||||
cv\_analysis package
|
||||
====================
|
||||
|
||||
.. automodule:: cv_analysis
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
cv_analysis.figure_detection
|
||||
cv_analysis.server
|
||||
cv_analysis.utils
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
cv_analysis.config
|
||||
cv_analysis.layout_parsing
|
||||
cv_analysis.locations
|
||||
cv_analysis.redaction_detection
|
||||
cv_analysis.table_inference
|
||||
cv_analysis.table_parsing
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user