change name from vidocp to cv-analysis
This commit is contained in:
parent
addacf9ed6
commit
8730b34018
@ -9,7 +9,7 @@ omit =
|
||||
*/setup.py
|
||||
*/build_venv/*
|
||||
source =
|
||||
vidocp
|
||||
cv-analysis
|
||||
relative_files = True
|
||||
data_file = .coverage
|
||||
|
||||
|
||||
8
.gitignore
vendored
8
.gitignore
vendored
@ -17,7 +17,7 @@ build_venv/
|
||||
/table_parsing.egg-info
|
||||
/target/
|
||||
/tests/
|
||||
/vidocp.egg-info/dependency_links.txt
|
||||
/vidocp.egg-info/PKG-INFO
|
||||
/vidocp.egg-info/SOURCES.txt
|
||||
/vidocp.egg-info/top_level.txt
|
||||
/cv-analysis.egg-info/dependency_links.txt
|
||||
/cv-analysis.egg-info/PKG-INFO
|
||||
/cv-analysis.egg-info/SOURCES.txt
|
||||
/cv-analysis.egg-info/top_level.txt
|
||||
|
||||
@ -1,12 +1,12 @@
|
||||
ARG BASE_ROOT="nexus.iqser.com:5001/red/"
|
||||
ARG VERSION_TAG=latest
|
||||
|
||||
FROM ${BASE_ROOT}vidocp-base:${VERSION_TAG}
|
||||
FROM ${BASE_ROOT}cv-analysis-base:${VERSION_TAG}
|
||||
|
||||
WORKDIR /app/service
|
||||
|
||||
COPY ./src ./src
|
||||
COPY vidocp ./vidocp
|
||||
COPY cv-analysis ./cv-analysis
|
||||
|
||||
RUN python3 -m pip install --upgrade pip
|
||||
RUN python3 -m pip install -e .
|
||||
|
||||
12
README.md
12
README.md
@ -1,4 +1,4 @@
|
||||
# Vidocp — Visual Document Parsing
|
||||
# cv-analysis — Visual Document Parsing
|
||||
|
||||
This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
|
||||
previous redactions in documents.
|
||||
@ -6,8 +6,8 @@ previous redactions in documents.
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
git clone ssh://git@git.iqser.com:2222/rr/vidocp.git
|
||||
cd vidocp
|
||||
git clone ssh://git@git.iqser.com:2222/rr/cv-analysis.git
|
||||
cd cv-analysis
|
||||
|
||||
python -m venv env
|
||||
source env/bin/activate
|
||||
@ -30,7 +30,7 @@ the specific task.
|
||||
The below snippet shows hot to find the outlines of previous redactions.
|
||||
|
||||
```python
|
||||
from vidocp.redaction_detection import find_redactions
|
||||
from cv_analysis.redaction_detection import find_redactions
|
||||
import pdf2image
|
||||
import numpy as np
|
||||
|
||||
@ -109,7 +109,7 @@ bash setup/docker.sh
|
||||
Build head image
|
||||
|
||||
```bash
|
||||
docker build -f Dockerfile -t vidocp . --build-arg BASE_ROOT=""
|
||||
docker build -f Dockerfile -t cv-analysis . --build-arg BASE_ROOT=""
|
||||
```
|
||||
|
||||
### Usage (service)
|
||||
@ -117,7 +117,7 @@ docker build -f Dockerfile -t vidocp . --build-arg BASE_ROOT=""
|
||||
Shell 1
|
||||
|
||||
```bash
|
||||
docker run --rm --net=host --rm vidocp
|
||||
docker run --rm --net=host --rm cv-analysis
|
||||
```
|
||||
|
||||
Shell 2
|
||||
|
||||
@ -33,8 +33,8 @@ import com.atlassian.bamboo.specs.model.task.ScriptTaskProperties.Location;
|
||||
@BambooSpec
|
||||
public class PlanSpec {
|
||||
|
||||
private static final String SERVICE_NAME = "vidocp";
|
||||
private static final String SERVICE_NAME_BASE = "vidocp-base";
|
||||
private static final String SERVICE_NAME = "cv-analysis";
|
||||
private static final String SERVICE_NAME_BASE = "cv-analysis-base";
|
||||
|
||||
private static final String SERVICE_KEY = SERVICE_NAME.toUpperCase().replaceAll("-","").replaceAll("_","");
|
||||
|
||||
@ -72,7 +72,7 @@ public class PlanSpec {
|
||||
return new Plan(
|
||||
project(),
|
||||
SERVICE_NAME, new BambooKey(SERVICE_KEY))
|
||||
.description("Docker build for vidocp.")
|
||||
.description("Docker build for cv-analysis.")
|
||||
// .variables()
|
||||
.stages(new Stage("Build Stage")
|
||||
.jobs(
|
||||
|
||||
@ -35,7 +35,7 @@ then
|
||||
echo "Sonar Scan for branch: ${bamboo_planRepository_1_branch}"
|
||||
/usr/bin/sonar-scanner/bin/sonar-scanner -X\
|
||||
-Dsonar.projectKey=RED_$SERVICE_NAME \
|
||||
-Dsonar.sources=src,vidocp \
|
||||
-Dsonar.sources=src,cv_analysis \
|
||||
-Dsonar.host.url=https://sonarqube.iqser.com \
|
||||
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
|
||||
-Dsonar.branch.name=${bamboo_planRepository_1_branch} \
|
||||
@ -48,7 +48,7 @@ else
|
||||
echo "Sonar Scan for PR with key1: ${bamboo_repository_pr_key}"
|
||||
/usr/bin/sonar-scanner/bin/sonar-scanner \
|
||||
-Dsonar.projectKey=RED_$SERVICE_NAME \
|
||||
-Dsonar.sources=src,vidocp \
|
||||
-Dsonar.sources=src,cv_analysis \
|
||||
-Dsonar.host.url=https://sonarqube.iqser.com \
|
||||
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
|
||||
-Dsonar.pullrequest.key=${bamboo_repository_pr_key} \
|
||||
|
||||
Binary file not shown.
@ -16,11 +16,11 @@ pip install -r requirements.txt
|
||||
echo "DVC pull step"
|
||||
dvc pull
|
||||
|
||||
# echo "coverage calculation"
|
||||
# coverage run -m pytest --ignore=tests
|
||||
# echo "coverage report generation"
|
||||
# coverage report -m
|
||||
# coverage xml
|
||||
echo "coverage calculation"
|
||||
coverage run -m pytest
|
||||
echo "coverage report generation"
|
||||
coverage report -m
|
||||
coverage xml
|
||||
|
||||
SERVICE_NAME=$1
|
||||
|
||||
@ -35,7 +35,7 @@ then
|
||||
echo "Sonar Scan for branch: ${bamboo_planRepository_1_branch}"
|
||||
/usr/bin/sonar-scanner/bin/sonar-scanner -X\
|
||||
-Dsonar.projectKey=RED_$SERVICE_NAME \
|
||||
-Dsonar.sources=src,vidocp \
|
||||
-Dsonar.sources=src,cv_analysis \
|
||||
-Dsonar.host.url=https://sonarqube.iqser.com \
|
||||
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
|
||||
-Dsonar.branch.name=${bamboo_planRepository_1_branch} \
|
||||
@ -48,7 +48,7 @@ else
|
||||
echo "Sonar Scan for PR with key1: ${bamboo_repository_pr_key}"
|
||||
/usr/bin/sonar-scanner/bin/sonar-scanner \
|
||||
-Dsonar.projectKey=RED_$SERVICE_NAME \
|
||||
-Dsonar.sources=src,vidocp \
|
||||
-Dsonar.sources=src,cv_analysis \
|
||||
-Dsonar.host.url=https://sonarqube.iqser.com \
|
||||
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
|
||||
-Dsonar.pullrequest.key=${bamboo_repository_pr_key} \
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
|
||||
from envyaml import EnvYAML
|
||||
from vidocp.locations import CONFIG_FILE
|
||||
from cv_analysis.locations import CONFIG_FILE
|
||||
|
||||
|
||||
def _get_item_and_maybe_make_dotindexable(container, item):
|
||||
@ -2,12 +2,12 @@ import cv2
|
||||
import numpy as np
|
||||
from pdf2image import pdf2image
|
||||
|
||||
from vidocp.utils.detection import detect_large_coherent_structures
|
||||
from vidocp.utils.display import show_mpl
|
||||
from vidocp.utils.draw import draw_rectangles
|
||||
from vidocp.utils.post_processing import remove_included
|
||||
from vidocp.utils.filters import is_large_enough, has_acceptable_format
|
||||
from vidocp.utils.text import remove_primary_text_regions
|
||||
from cv_analysis.utils.detection import detect_large_coherent_structures
|
||||
from cv_analysis.utils.display import show_mpl
|
||||
from cv_analysis.utils.draw import draw_rectangles
|
||||
from cv_analysis.utils.post_processing import remove_included
|
||||
from cv_analysis.utils.filters import is_large_enough, has_acceptable_format
|
||||
from cv_analysis.utils.text import remove_primary_text_regions
|
||||
|
||||
|
||||
def is_likely_figure(cont, min_area=5000, max_width_to_hight_ratio=6):
|
||||
@ -6,9 +6,9 @@ import cv2
|
||||
import numpy as np
|
||||
from pdf2image import pdf2image
|
||||
|
||||
from vidocp.utils.display import show_mpl
|
||||
from vidocp.utils.draw import draw_rectangles
|
||||
from vidocp.utils.post_processing import remove_overlapping, remove_included, has_no_parent
|
||||
from cv_analysis.utils.display import show_mpl
|
||||
from cv_analysis.utils.draw import draw_rectangles
|
||||
from cv_analysis.utils.post_processing import remove_overlapping, remove_included, has_no_parent
|
||||
|
||||
|
||||
def is_likely_segment(rect, min_area=100):
|
||||
@ -5,9 +5,9 @@ import numpy as np
|
||||
import pdf2image
|
||||
from iteration_utilities import starfilter, first
|
||||
|
||||
from vidocp.utils.display import show_mpl
|
||||
from vidocp.utils.draw import draw_contours
|
||||
from vidocp.utils.filters import is_large_enough, is_filled, is_boxy
|
||||
from cv_analysis.utils.display import show_mpl
|
||||
from cv_analysis.utils.draw import draw_contours
|
||||
from cv_analysis.utils.filters import is_large_enough, is_filled, is_boxy
|
||||
|
||||
|
||||
def is_likely_redaction(contour, hierarchy, min_area):
|
||||
@ -6,11 +6,11 @@ import cv2
|
||||
import numpy as np
|
||||
from pdf2image import pdf2image
|
||||
|
||||
from vidocp.utils.display import show_mpl
|
||||
from vidocp.utils.draw import draw_rectangles
|
||||
from vidocp.utils.post_processing import xywh_to_vecs, xywh_to_vec_rect, adjacent1d, remove_isolated
|
||||
from vidocp.utils.deskew import deskew_histbased
|
||||
from vidocp.layout_parsing import parse_layout
|
||||
from cv_analysis.utils.display import show_mpl
|
||||
from cv_analysis.utils.draw import draw_rectangles
|
||||
from cv_analysis.utils.post_processing import xywh_to_vecs, xywh_to_vec_rect, adjacent1d, remove_isolated
|
||||
from cv_analysis.utils.deskew import deskew_histbased
|
||||
from cv_analysis.layout_parsing import parse_layout
|
||||
|
||||
|
||||
def add_external_contours(image, img):
|
||||
4
cv_analysis/test/config.py
Normal file
4
cv_analysis/test/config.py
Normal file
@ -0,0 +1,4 @@
|
||||
from cv_analysis.config import Config
|
||||
from cv_analysis.locations import TEST_CONFIG_FILE
|
||||
|
||||
TEST_CONFIG = Config(TEST_CONFIG_FILE)
|
||||
|
Before Width: | Height: | Size: 215 KiB After Width: | Height: | Size: 215 KiB |
@ -1,4 +1,4 @@
|
||||
from vidocp.config import CONFIG
|
||||
from cv_analysis.config import CONFIG
|
||||
|
||||
|
||||
def test_config():
|
||||
@ -1,11 +1,11 @@
|
||||
from os.path import join
|
||||
import json
|
||||
|
||||
from vidocp.table_parsing import parse_table
|
||||
from vidocp.locations import TEST_DATA_DIR
|
||||
from vidocp.test.config import TEST_CONFIG
|
||||
from vidocp.utils.test_metrics import compute_document_score
|
||||
from vidocp.utils.preprocessing import open_pdf
|
||||
from cv_analysis.table_parsing import parse_table
|
||||
from cv_analysis.locations import TEST_DATA_DIR
|
||||
from cv_analysis.test.config import TEST_CONFIG
|
||||
from cv_analysis.utils.test_metrics import compute_document_score
|
||||
from cv_analysis.utils.preprocessing import open_pdf
|
||||
|
||||
|
||||
def test_table_parsing():
|
||||
@ -2,7 +2,7 @@ import numpy as np
|
||||
from scipy.ndimage import rotate as rotate_
|
||||
import cv2
|
||||
|
||||
from vidocp.config import CONFIG
|
||||
from cv_analysis.config import CONFIG
|
||||
|
||||
|
||||
def rotate_straight(im: np.array, skew_angle: int) -> np.array:
|
||||
@ -1,6 +1,6 @@
|
||||
import cv2
|
||||
|
||||
from vidocp.utils import copy_and_normalize_channels
|
||||
from cv_analysis.utils import copy_and_normalize_channels
|
||||
|
||||
|
||||
def draw_contours(image, contours):
|
||||
@ -2,7 +2,7 @@
|
||||
import sys
|
||||
import logging
|
||||
|
||||
from vidocp.config import CONFIG
|
||||
from cv_analysis.config import CONFIG
|
||||
|
||||
|
||||
def get_logger():
|
||||
@ -3,7 +3,7 @@ import pdf2image
|
||||
from PIL import Image
|
||||
import cv2
|
||||
|
||||
from vidocp.utils.deskew import deskew
|
||||
from cv_analysis.utils.deskew import deskew
|
||||
|
||||
|
||||
def preprocess_pdf_image(page):
|
||||
@ -1,9 +1,9 @@
|
||||
import argparse
|
||||
|
||||
from vidocp.table_parsing import annotate_tables_in_pdf
|
||||
from vidocp.redaction_detection import annotate_redactions_in_pdf
|
||||
from vidocp.layout_parsing import annotate_layout_in_pdf
|
||||
from vidocp.figure_detection import detect_figures_in_pdf
|
||||
from cv_analysis.table_parsing import annotate_tables_in_pdf
|
||||
from cv_analysis.redaction_detection import annotate_redactions_in_pdf
|
||||
from cv_analysis.layout_parsing import annotate_layout_in_pdf
|
||||
from cv_analysis.figure_detection import detect_figures_in_pdf
|
||||
|
||||
|
||||
def parse_args():
|
||||
|
||||
@ -4,7 +4,7 @@ import json
|
||||
from multiprocessing.sharedctypes import Value
|
||||
import requests
|
||||
|
||||
from vidocp.utils.preprocessing import open_pdf
|
||||
from cv_analysis.utils.preprocessing import open_pdf
|
||||
|
||||
|
||||
def parse_args():
|
||||
|
||||
@ -4,10 +4,10 @@ import numpy as np
|
||||
import pdf2image
|
||||
from PIL import Image
|
||||
|
||||
from vidocp.utils.deskew import deskew_histbased # , deskew_linebased
|
||||
from vidocp.utils.display import show_mpl
|
||||
from vidocp.utils.draw import draw_stats
|
||||
from vidocp.table_parsing import parse_table
|
||||
from cv_analysis.utils.deskew import deskew_histbased # , deskew_linebased
|
||||
from cv_analysis.utils.display import show_mpl
|
||||
from cv_analysis.utils.draw import draw_stats
|
||||
from cv_analysis.table_parsing import parse_table
|
||||
|
||||
|
||||
def parse_args():
|
||||
|
||||
4
setup.py
4
setup.py
@ -3,11 +3,11 @@
|
||||
from distutils.core import setup
|
||||
|
||||
setup(
|
||||
name="vidocp",
|
||||
name="cv_analysis",
|
||||
version="0.0.1",
|
||||
description="",
|
||||
author="",
|
||||
author_email="",
|
||||
url="",
|
||||
packages=["vidocp"],
|
||||
packages=["cv_analysis"],
|
||||
)
|
||||
|
||||
@ -9,5 +9,5 @@ python3 -m pip install --upgrade pip
|
||||
#pip install 'dvc[ssh]'
|
||||
#dvc pull
|
||||
|
||||
docker build -f Dockerfile_base -t vidocp-base .
|
||||
docker build -f Dockerfile -t vidocp .
|
||||
docker build -f Dockerfile_base -t cv-analysis-base .
|
||||
docker build -f Dockerfile -t cv-analysis .
|
||||
@ -8,15 +8,15 @@ from prometheus_client import Counter, Gauge
|
||||
from prometheus_flask_exporter import PrometheusMetrics
|
||||
from waitress import serve
|
||||
|
||||
from vidocp.utils import npconvert
|
||||
from vidocp.utils.preprocessing import preprocess_pdf_image # TODO
|
||||
from vidocp.table_parsing import parse_table # , detect_tables_in_pdf
|
||||
from vidocp.redaction_detection import find_redactions # , detect_redactions_in_pdf
|
||||
from vidocp.layout_parsing import parse_layout # , detect_layout_in_pdf #TODO
|
||||
from vidocp.figure_detection import detect_figures # , detect_figures_in_pdf #TODO
|
||||
from vidocp.utils.logging import logger
|
||||
from vidocp.utils.preprocessing import open_pdf
|
||||
from vidocp.config import CONFIG
|
||||
from cv_analysis.utils import npconvert
|
||||
from cv_analysis.utils.preprocessing import preprocess_pdf_image # TODO
|
||||
from cv_analysis.table_parsing import parse_table # , detect_tables_in_pdf
|
||||
from cv_analysis.redaction_detection import find_redactions # , detect_redactions_in_pdf
|
||||
from cv_analysis.layout_parsing import parse_layout # , detect_layout_in_pdf #TODO
|
||||
from cv_analysis.figure_detection import detect_figures # , detect_figures_in_pdf #TODO
|
||||
from cv_analysis.utils.logging import logger
|
||||
from cv_analysis.utils.preprocessing import open_pdf
|
||||
from cv_analysis.config import CONFIG
|
||||
|
||||
|
||||
def suppress_user_warnings():
|
||||
@ -30,9 +30,9 @@ def main():
|
||||
|
||||
|
||||
def run_server():
|
||||
file_counter = Counter("vidocp_file_counter", "count processed files")
|
||||
# page_counter = Counter("vidocp_page_counter", "count pages from processed files")
|
||||
ram_metric = Gauge("vidocp_memory_usage", "Memory usage in Mb")
|
||||
file_counter = Counter("cv_analysis_file_counter", "count processed files")
|
||||
# page_counter = Counter("cv-analysis_page_counter", "count pages from processed files")
|
||||
ram_metric = Gauge("cv_analysis_memory_usage", "Memory usage in Mb")
|
||||
|
||||
def start_monitoring():
|
||||
file_counter.inc()
|
||||
@ -146,19 +146,19 @@ def annotate(task):
|
||||
|
||||
|
||||
def make_art():
|
||||
art = """
|
||||
|
||||
=================================================================================================
|
||||
== ==== ============== ================= ==========================================
|
||||
== ==== ============== ==== ================ ==== =========================================
|
||||
== ==== ============== ==== ================ ==== =========================================
|
||||
== ==== == == == ==== === ==== === ==== === === = ==== ==== === = ===
|
||||
== == ========== == ==== == == = == === = == = == = == = == = ==
|
||||
=== == === ===== === ==== == = == ===== =========== == ======== ==== == =======
|
||||
=== == === ==== ==== ==== == = == ===== ========= == ========= === ===== =======
|
||||
==== ==== === ===== ==== == = == = == ======== = == ======= = == = == =======
|
||||
===== ===== == == ==== ==== === ========= == ======== ==== === =======
|
||||
=================================================================================================
|
||||
art = r"""
|
||||
__ __
|
||||
| \ | \
|
||||
_______ __ __ ______ _______ ______ | $$ __ __ _______ \$$ _______
|
||||
/ \| \ / \ ______ | \ | \ | \ | $$| \ | \ / \| \ / \
|
||||
| $$$$$$$ \$$\ / $$| \ \$$$$$$\| $$$$$$$\ \$$$$$$\| $$| $$ | $$| $$$$$$$| $$| $$$$$$$
|
||||
| $$ \$$\ $$ \$$$$$$/ $$| $$ | $$ / $$| $$| $$ | $$ \$$ \ | $$ \$$ \
|
||||
| $$_____ \$$ $$ | $$$$$$$| $$ | $$| $$$$$$$| $$| $$__/ $$ _\$$$$$$\| $$ _\$$$$$$\
|
||||
\$$ \ \$$$ \$$ $$| $$ | $$ \$$ $$| $$ \$$ $$| $$| $$| $$
|
||||
\$$$$$$$ \$ \$$$$$$$ \$$ \$$ \$$$$$$$ \$$ _\$$$$$$$ \$$$$$$$ \$$ \$$$$$$$
|
||||
| \__| $$
|
||||
\$$ $$
|
||||
\$$$$$$
|
||||
|
||||
"""
|
||||
return art
|
||||
|
||||
@ -1,4 +0,0 @@
|
||||
from vidocp.config import Config
|
||||
from vidocp.locations import TEST_CONFIG_FILE
|
||||
|
||||
TEST_CONFIG = Config(TEST_CONFIG_FILE)
|
||||
Loading…
x
Reference in New Issue
Block a user