change name from vidocp to cv-analysis

This commit is contained in:
Isaac Riley 2022-03-23 13:46:57 +01:00
parent addacf9ed6
commit 8730b34018
42 changed files with 95 additions and 95 deletions

View File

@ -9,7 +9,7 @@ omit =
*/setup.py
*/build_venv/*
source =
vidocp
cv-analysis
relative_files = True
data_file = .coverage

8
.gitignore vendored
View File

@ -17,7 +17,7 @@ build_venv/
/table_parsing.egg-info
/target/
/tests/
/vidocp.egg-info/dependency_links.txt
/vidocp.egg-info/PKG-INFO
/vidocp.egg-info/SOURCES.txt
/vidocp.egg-info/top_level.txt
/cv-analysis.egg-info/dependency_links.txt
/cv-analysis.egg-info/PKG-INFO
/cv-analysis.egg-info/SOURCES.txt
/cv-analysis.egg-info/top_level.txt

View File

@ -1,12 +1,12 @@
ARG BASE_ROOT="nexus.iqser.com:5001/red/"
ARG VERSION_TAG=latest
FROM ${BASE_ROOT}vidocp-base:${VERSION_TAG}
FROM ${BASE_ROOT}cv-analysis-base:${VERSION_TAG}
WORKDIR /app/service
COPY ./src ./src
COPY vidocp ./vidocp
COPY cv-analysis ./cv-analysis
RUN python3 -m pip install --upgrade pip
RUN python3 -m pip install -e .

View File

@ -1,4 +1,4 @@
# Vidocp — Visual Document Parsing
# cv-analysis — Visual Document Parsing
This repository implements computer vision based approaches for detecting and parsing visual features such as tables or
previous redactions in documents.
@ -6,8 +6,8 @@ previous redactions in documents.
## Installation
```bash
git clone ssh://git@git.iqser.com:2222/rr/vidocp.git
cd vidocp
git clone ssh://git@git.iqser.com:2222/rr/cv-analysis.git
cd cv-analysis
python -m venv env
source env/bin/activate
@ -30,7 +30,7 @@ the specific task.
The below snippet shows hot to find the outlines of previous redactions.
```python
from vidocp.redaction_detection import find_redactions
from cv_analysis.redaction_detection import find_redactions
import pdf2image
import numpy as np
@ -109,7 +109,7 @@ bash setup/docker.sh
Build head image
```bash
docker build -f Dockerfile -t vidocp . --build-arg BASE_ROOT=""
docker build -f Dockerfile -t cv-analysis . --build-arg BASE_ROOT=""
```
### Usage (service)
@ -117,7 +117,7 @@ docker build -f Dockerfile -t vidocp . --build-arg BASE_ROOT=""
Shell 1
```bash
docker run --rm --net=host --rm vidocp
docker run --rm --net=host --rm cv-analysis
```
Shell 2

View File

@ -33,8 +33,8 @@ import com.atlassian.bamboo.specs.model.task.ScriptTaskProperties.Location;
@BambooSpec
public class PlanSpec {
private static final String SERVICE_NAME = "vidocp";
private static final String SERVICE_NAME_BASE = "vidocp-base";
private static final String SERVICE_NAME = "cv-analysis";
private static final String SERVICE_NAME_BASE = "cv-analysis-base";
private static final String SERVICE_KEY = SERVICE_NAME.toUpperCase().replaceAll("-","").replaceAll("_","");
@ -72,7 +72,7 @@ public class PlanSpec {
return new Plan(
project(),
SERVICE_NAME, new BambooKey(SERVICE_KEY))
.description("Docker build for vidocp.")
.description("Docker build for cv-analysis.")
// .variables()
.stages(new Stage("Build Stage")
.jobs(

View File

@ -35,7 +35,7 @@ then
echo "Sonar Scan for branch: ${bamboo_planRepository_1_branch}"
/usr/bin/sonar-scanner/bin/sonar-scanner -X\
-Dsonar.projectKey=RED_$SERVICE_NAME \
-Dsonar.sources=src,vidocp \
-Dsonar.sources=src,cv_analysis \
-Dsonar.host.url=https://sonarqube.iqser.com \
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
-Dsonar.branch.name=${bamboo_planRepository_1_branch} \
@ -48,7 +48,7 @@ else
echo "Sonar Scan for PR with key1: ${bamboo_repository_pr_key}"
/usr/bin/sonar-scanner/bin/sonar-scanner \
-Dsonar.projectKey=RED_$SERVICE_NAME \
-Dsonar.sources=src,vidocp \
-Dsonar.sources=src,cv_analysis \
-Dsonar.host.url=https://sonarqube.iqser.com \
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
-Dsonar.pullrequest.key=${bamboo_repository_pr_key} \

View File

@ -16,11 +16,11 @@ pip install -r requirements.txt
echo "DVC pull step"
dvc pull
# echo "coverage calculation"
# coverage run -m pytest --ignore=tests
# echo "coverage report generation"
# coverage report -m
# coverage xml
echo "coverage calculation"
coverage run -m pytest
echo "coverage report generation"
coverage report -m
coverage xml
SERVICE_NAME=$1
@ -35,7 +35,7 @@ then
echo "Sonar Scan for branch: ${bamboo_planRepository_1_branch}"
/usr/bin/sonar-scanner/bin/sonar-scanner -X\
-Dsonar.projectKey=RED_$SERVICE_NAME \
-Dsonar.sources=src,vidocp \
-Dsonar.sources=src,cv_analysis \
-Dsonar.host.url=https://sonarqube.iqser.com \
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
-Dsonar.branch.name=${bamboo_planRepository_1_branch} \
@ -48,7 +48,7 @@ else
echo "Sonar Scan for PR with key1: ${bamboo_repository_pr_key}"
/usr/bin/sonar-scanner/bin/sonar-scanner \
-Dsonar.projectKey=RED_$SERVICE_NAME \
-Dsonar.sources=src,vidocp \
-Dsonar.sources=src,cv_analysis \
-Dsonar.host.url=https://sonarqube.iqser.com \
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
-Dsonar.pullrequest.key=${bamboo_repository_pr_key} \

View File

@ -2,7 +2,7 @@
from envyaml import EnvYAML
from vidocp.locations import CONFIG_FILE
from cv_analysis.locations import CONFIG_FILE
def _get_item_and_maybe_make_dotindexable(container, item):

View File

@ -2,12 +2,12 @@ import cv2
import numpy as np
from pdf2image import pdf2image
from vidocp.utils.detection import detect_large_coherent_structures
from vidocp.utils.display import show_mpl
from vidocp.utils.draw import draw_rectangles
from vidocp.utils.post_processing import remove_included
from vidocp.utils.filters import is_large_enough, has_acceptable_format
from vidocp.utils.text import remove_primary_text_regions
from cv_analysis.utils.detection import detect_large_coherent_structures
from cv_analysis.utils.display import show_mpl
from cv_analysis.utils.draw import draw_rectangles
from cv_analysis.utils.post_processing import remove_included
from cv_analysis.utils.filters import is_large_enough, has_acceptable_format
from cv_analysis.utils.text import remove_primary_text_regions
def is_likely_figure(cont, min_area=5000, max_width_to_hight_ratio=6):

View File

@ -6,9 +6,9 @@ import cv2
import numpy as np
from pdf2image import pdf2image
from vidocp.utils.display import show_mpl
from vidocp.utils.draw import draw_rectangles
from vidocp.utils.post_processing import remove_overlapping, remove_included, has_no_parent
from cv_analysis.utils.display import show_mpl
from cv_analysis.utils.draw import draw_rectangles
from cv_analysis.utils.post_processing import remove_overlapping, remove_included, has_no_parent
def is_likely_segment(rect, min_area=100):

View File

@ -5,9 +5,9 @@ import numpy as np
import pdf2image
from iteration_utilities import starfilter, first
from vidocp.utils.display import show_mpl
from vidocp.utils.draw import draw_contours
from vidocp.utils.filters import is_large_enough, is_filled, is_boxy
from cv_analysis.utils.display import show_mpl
from cv_analysis.utils.draw import draw_contours
from cv_analysis.utils.filters import is_large_enough, is_filled, is_boxy
def is_likely_redaction(contour, hierarchy, min_area):

View File

@ -6,11 +6,11 @@ import cv2
import numpy as np
from pdf2image import pdf2image
from vidocp.utils.display import show_mpl
from vidocp.utils.draw import draw_rectangles
from vidocp.utils.post_processing import xywh_to_vecs, xywh_to_vec_rect, adjacent1d, remove_isolated
from vidocp.utils.deskew import deskew_histbased
from vidocp.layout_parsing import parse_layout
from cv_analysis.utils.display import show_mpl
from cv_analysis.utils.draw import draw_rectangles
from cv_analysis.utils.post_processing import xywh_to_vecs, xywh_to_vec_rect, adjacent1d, remove_isolated
from cv_analysis.utils.deskew import deskew_histbased
from cv_analysis.layout_parsing import parse_layout
def add_external_contours(image, img):

View File

@ -0,0 +1,4 @@
from cv_analysis.config import Config
from cv_analysis.locations import TEST_CONFIG_FILE
TEST_CONFIG = Config(TEST_CONFIG_FILE)

View File

Before

Width:  |  Height:  |  Size: 215 KiB

After

Width:  |  Height:  |  Size: 215 KiB

View File

@ -1,4 +1,4 @@
from vidocp.config import CONFIG
from cv_analysis.config import CONFIG
def test_config():

View File

@ -1,11 +1,11 @@
from os.path import join
import json
from vidocp.table_parsing import parse_table
from vidocp.locations import TEST_DATA_DIR
from vidocp.test.config import TEST_CONFIG
from vidocp.utils.test_metrics import compute_document_score
from vidocp.utils.preprocessing import open_pdf
from cv_analysis.table_parsing import parse_table
from cv_analysis.locations import TEST_DATA_DIR
from cv_analysis.test.config import TEST_CONFIG
from cv_analysis.utils.test_metrics import compute_document_score
from cv_analysis.utils.preprocessing import open_pdf
def test_table_parsing():

View File

@ -2,7 +2,7 @@ import numpy as np
from scipy.ndimage import rotate as rotate_
import cv2
from vidocp.config import CONFIG
from cv_analysis.config import CONFIG
def rotate_straight(im: np.array, skew_angle: int) -> np.array:

View File

@ -1,6 +1,6 @@
import cv2
from vidocp.utils import copy_and_normalize_channels
from cv_analysis.utils import copy_and_normalize_channels
def draw_contours(image, contours):

View File

@ -2,7 +2,7 @@
import sys
import logging
from vidocp.config import CONFIG
from cv_analysis.config import CONFIG
def get_logger():

View File

@ -3,7 +3,7 @@ import pdf2image
from PIL import Image
import cv2
from vidocp.utils.deskew import deskew
from cv_analysis.utils.deskew import deskew
def preprocess_pdf_image(page):

View File

@ -1,9 +1,9 @@
import argparse
from vidocp.table_parsing import annotate_tables_in_pdf
from vidocp.redaction_detection import annotate_redactions_in_pdf
from vidocp.layout_parsing import annotate_layout_in_pdf
from vidocp.figure_detection import detect_figures_in_pdf
from cv_analysis.table_parsing import annotate_tables_in_pdf
from cv_analysis.redaction_detection import annotate_redactions_in_pdf
from cv_analysis.layout_parsing import annotate_layout_in_pdf
from cv_analysis.figure_detection import detect_figures_in_pdf
def parse_args():

View File

@ -4,7 +4,7 @@ import json
from multiprocessing.sharedctypes import Value
import requests
from vidocp.utils.preprocessing import open_pdf
from cv_analysis.utils.preprocessing import open_pdf
def parse_args():

View File

@ -4,10 +4,10 @@ import numpy as np
import pdf2image
from PIL import Image
from vidocp.utils.deskew import deskew_histbased # , deskew_linebased
from vidocp.utils.display import show_mpl
from vidocp.utils.draw import draw_stats
from vidocp.table_parsing import parse_table
from cv_analysis.utils.deskew import deskew_histbased # , deskew_linebased
from cv_analysis.utils.display import show_mpl
from cv_analysis.utils.draw import draw_stats
from cv_analysis.table_parsing import parse_table
def parse_args():

View File

@ -3,11 +3,11 @@
from distutils.core import setup
setup(
name="vidocp",
name="cv_analysis",
version="0.0.1",
description="",
author="",
author_email="",
url="",
packages=["vidocp"],
packages=["cv_analysis"],
)

View File

@ -9,5 +9,5 @@ python3 -m pip install --upgrade pip
#pip install 'dvc[ssh]'
#dvc pull
docker build -f Dockerfile_base -t vidocp-base .
docker build -f Dockerfile -t vidocp .
docker build -f Dockerfile_base -t cv-analysis-base .
docker build -f Dockerfile -t cv-analysis .

View File

@ -8,15 +8,15 @@ from prometheus_client import Counter, Gauge
from prometheus_flask_exporter import PrometheusMetrics
from waitress import serve
from vidocp.utils import npconvert
from vidocp.utils.preprocessing import preprocess_pdf_image # TODO
from vidocp.table_parsing import parse_table # , detect_tables_in_pdf
from vidocp.redaction_detection import find_redactions # , detect_redactions_in_pdf
from vidocp.layout_parsing import parse_layout # , detect_layout_in_pdf #TODO
from vidocp.figure_detection import detect_figures # , detect_figures_in_pdf #TODO
from vidocp.utils.logging import logger
from vidocp.utils.preprocessing import open_pdf
from vidocp.config import CONFIG
from cv_analysis.utils import npconvert
from cv_analysis.utils.preprocessing import preprocess_pdf_image # TODO
from cv_analysis.table_parsing import parse_table # , detect_tables_in_pdf
from cv_analysis.redaction_detection import find_redactions # , detect_redactions_in_pdf
from cv_analysis.layout_parsing import parse_layout # , detect_layout_in_pdf #TODO
from cv_analysis.figure_detection import detect_figures # , detect_figures_in_pdf #TODO
from cv_analysis.utils.logging import logger
from cv_analysis.utils.preprocessing import open_pdf
from cv_analysis.config import CONFIG
def suppress_user_warnings():
@ -30,9 +30,9 @@ def main():
def run_server():
file_counter = Counter("vidocp_file_counter", "count processed files")
# page_counter = Counter("vidocp_page_counter", "count pages from processed files")
ram_metric = Gauge("vidocp_memory_usage", "Memory usage in Mb")
file_counter = Counter("cv_analysis_file_counter", "count processed files")
# page_counter = Counter("cv-analysis_page_counter", "count pages from processed files")
ram_metric = Gauge("cv_analysis_memory_usage", "Memory usage in Mb")
def start_monitoring():
file_counter.inc()
@ -146,19 +146,19 @@ def annotate(task):
def make_art():
art = """
=================================================================================================
== ==== ============== ================= ==========================================
== ==== ============== ==== ================ ==== =========================================
== ==== ============== ==== ================ ==== =========================================
== ==== == == == ==== === ==== === ==== === === = ==== ==== === = ===
== == ========== == ==== == == = == === = == = == = == = == = ==
=== == === ===== === ==== == = == ===== =========== == ======== ==== == =======
=== == === ==== ==== ==== == = == ===== ========= == ========= === ===== =======
==== ==== === ===== ==== == = == = == ======== = == ======= = == = == =======
===== ===== == == ==== ==== === ========= == ======== ==== === =======
=================================================================================================
art = r"""
__ __
| \ | \
_______ __ __ ______ _______ ______ | $$ __ __ _______ \$$ _______
/ \| \ / \ ______ | \ | \ | \ | $$| \ | \ / \| \ / \
| $$$$$$$ \$$\ / $$| \ \$$$$$$\| $$$$$$$\ \$$$$$$\| $$| $$ | $$| $$$$$$$| $$| $$$$$$$
| $$ \$$\ $$ \$$$$$$/ $$| $$ | $$ / $$| $$| $$ | $$ \$$ \ | $$ \$$ \
| $$_____ \$$ $$ | $$$$$$$| $$ | $$| $$$$$$$| $$| $$__/ $$ _\$$$$$$\| $$ _\$$$$$$\
\$$ \ \$$$ \$$ $$| $$ | $$ \$$ $$| $$ \$$ $$| $$| $$| $$
\$$$$$$$ \$ \$$$$$$$ \$$ \$$ \$$$$$$$ \$$ _\$$$$$$$ \$$$$$$$ \$$ \$$$$$$$
| \__| $$
\$$ $$
\$$$$$$
"""
return art

View File

@ -1,4 +0,0 @@
from vidocp.config import Config
from vidocp.locations import TEST_CONFIG_FILE
TEST_CONFIG = Config(TEST_CONFIG_FILE)