From 8730b34018ac240357ec1ed28acd7a46097f4d42 Mon Sep 17 00:00:00 2001 From: Isaac Riley Date: Wed, 23 Mar 2022 13:46:57 +0100 Subject: [PATCH] change name from vidocp to cv-analysis --- .coveragerc | 2 +- .gitignore | 8 +-- Dockerfile | 4 +- README.md | 12 ++--- .../src/main/java/buildjob/PlanSpec.java | 6 +-- .../src/main/resources/scripts/sonar-scan.sh | 4 +- .../target/classes/buildjob/PlanSpec.class | Bin 11149 -> 11179 bytes .../target/classes/scripts/sonar-scan.sh | 14 ++--- {vidocp => cv_analysis}/__init__.py | 0 {vidocp => cv_analysis}/config.py | 2 +- {vidocp => cv_analysis}/figure_detection.py | 12 ++--- {vidocp => cv_analysis}/layout_detection.py | 0 {vidocp => cv_analysis}/layout_parsing.py | 6 +-- {vidocp => cv_analysis}/locations.py | 0 .../redaction_detection.py | 6 +-- {vidocp => cv_analysis}/table_parsing.py | 10 ++-- {vidocp => cv_analysis}/test/__init__.py | 0 cv_analysis/test/config.py | 4 ++ {vidocp => cv_analysis}/test/config.yaml | 0 .../test/test_data/table.jpg | Bin .../test/test_data/table.json | 0 .../test/unit_tests/config_test.py | 2 +- .../test/unit_tests/table_test.py | 10 ++-- {vidocp => cv_analysis}/utils/__init__.py | 0 {vidocp => cv_analysis}/utils/deskew.py | 2 +- {vidocp => cv_analysis}/utils/detection.py | 0 {vidocp => cv_analysis}/utils/display.py | 0 {vidocp => cv_analysis}/utils/draw.py | 2 +- {vidocp => cv_analysis}/utils/filters.py | 0 {vidocp => cv_analysis}/utils/logging.py | 2 +- .../utils/post_processing.py | 0 .../utils/preprocessing.py | 2 +- {vidocp => cv_analysis}/utils/test_metrics.py | 0 {vidocp => cv_analysis}/utils/text.py | 0 {vidocp => cv_analysis}/utils/utils.py | 0 scripts/annotate.py | 8 +-- scripts/client_mock.py | 2 +- scripts/deskew_demo.py | 8 +-- setup.py | 4 +- setup/docker.sh | 4 +- src/run_service.py | 50 +++++++++--------- vidocp/test/config.py | 4 -- 42 files changed, 95 insertions(+), 95 deletions(-) rename {vidocp => cv_analysis}/__init__.py (100%) rename {vidocp => cv_analysis}/config.py (95%) rename {vidocp => cv_analysis}/figure_detection.py (70%) rename {vidocp => cv_analysis}/layout_detection.py (100%) rename {vidocp => cv_analysis}/layout_parsing.py (91%) rename {vidocp => cv_analysis}/locations.py (100%) rename {vidocp => cv_analysis}/redaction_detection.py (88%) rename {vidocp => cv_analysis}/table_parsing.py (94%) rename {vidocp => cv_analysis}/test/__init__.py (100%) create mode 100644 cv_analysis/test/config.py rename {vidocp => cv_analysis}/test/config.yaml (100%) rename {vidocp => cv_analysis}/test/test_data/table.jpg (100%) rename {vidocp => cv_analysis}/test/test_data/table.json (100%) rename {vidocp => cv_analysis}/test/unit_tests/config_test.py (63%) rename {vidocp => cv_analysis}/test/unit_tests/table_test.py (64%) rename {vidocp => cv_analysis}/utils/__init__.py (100%) rename {vidocp => cv_analysis}/utils/deskew.py (98%) rename {vidocp => cv_analysis}/utils/detection.py (100%) rename {vidocp => cv_analysis}/utils/display.py (100%) rename {vidocp => cv_analysis}/utils/draw.py (92%) rename {vidocp => cv_analysis}/utils/filters.py (100%) rename {vidocp => cv_analysis}/utils/logging.py (93%) rename {vidocp => cv_analysis}/utils/post_processing.py (100%) rename {vidocp => cv_analysis}/utils/preprocessing.py (95%) rename {vidocp => cv_analysis}/utils/test_metrics.py (100%) rename {vidocp => cv_analysis}/utils/text.py (100%) rename {vidocp => cv_analysis}/utils/utils.py (100%) delete mode 100644 vidocp/test/config.py diff --git a/.coveragerc b/.coveragerc index 792a045..dc9f912 100644 --- a/.coveragerc +++ b/.coveragerc @@ -9,7 +9,7 @@ omit = */setup.py */build_venv/* source = - vidocp + cv-analysis relative_files = True data_file = .coverage diff --git a/.gitignore b/.gitignore index 87a1ace..cdfefcc 100644 --- a/.gitignore +++ b/.gitignore @@ -17,7 +17,7 @@ build_venv/ /table_parsing.egg-info /target/ /tests/ -/vidocp.egg-info/dependency_links.txt -/vidocp.egg-info/PKG-INFO -/vidocp.egg-info/SOURCES.txt -/vidocp.egg-info/top_level.txt +/cv-analysis.egg-info/dependency_links.txt +/cv-analysis.egg-info/PKG-INFO +/cv-analysis.egg-info/SOURCES.txt +/cv-analysis.egg-info/top_level.txt diff --git a/Dockerfile b/Dockerfile index 98abde6..98a29a0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,12 @@ ARG BASE_ROOT="nexus.iqser.com:5001/red/" ARG VERSION_TAG=latest -FROM ${BASE_ROOT}vidocp-base:${VERSION_TAG} +FROM ${BASE_ROOT}cv-analysis-base:${VERSION_TAG} WORKDIR /app/service COPY ./src ./src -COPY vidocp ./vidocp +COPY cv-analysis ./cv-analysis RUN python3 -m pip install --upgrade pip RUN python3 -m pip install -e . diff --git a/README.md b/README.md index 1654cc1..f2c8b9c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Vidocp — Visual Document Parsing +# cv-analysis — Visual Document Parsing This repository implements computer vision based approaches for detecting and parsing visual features such as tables or previous redactions in documents. @@ -6,8 +6,8 @@ previous redactions in documents. ## Installation ```bash -git clone ssh://git@git.iqser.com:2222/rr/vidocp.git -cd vidocp +git clone ssh://git@git.iqser.com:2222/rr/cv-analysis.git +cd cv-analysis python -m venv env source env/bin/activate @@ -30,7 +30,7 @@ the specific task. The below snippet shows hot to find the outlines of previous redactions. ```python -from vidocp.redaction_detection import find_redactions +from cv_analysis.redaction_detection import find_redactions import pdf2image import numpy as np @@ -109,7 +109,7 @@ bash setup/docker.sh Build head image ```bash -docker build -f Dockerfile -t vidocp . --build-arg BASE_ROOT="" +docker build -f Dockerfile -t cv-analysis . --build-arg BASE_ROOT="" ``` ### Usage (service) @@ -117,7 +117,7 @@ docker build -f Dockerfile -t vidocp . --build-arg BASE_ROOT="" Shell 1 ```bash -docker run --rm --net=host --rm vidocp +docker run --rm --net=host --rm cv-analysis ``` Shell 2 diff --git a/bamboo-specs/src/main/java/buildjob/PlanSpec.java b/bamboo-specs/src/main/java/buildjob/PlanSpec.java index 3992c89..ec8905b 100644 --- a/bamboo-specs/src/main/java/buildjob/PlanSpec.java +++ b/bamboo-specs/src/main/java/buildjob/PlanSpec.java @@ -33,8 +33,8 @@ import com.atlassian.bamboo.specs.model.task.ScriptTaskProperties.Location; @BambooSpec public class PlanSpec { - private static final String SERVICE_NAME = "vidocp"; - private static final String SERVICE_NAME_BASE = "vidocp-base"; + private static final String SERVICE_NAME = "cv-analysis"; + private static final String SERVICE_NAME_BASE = "cv-analysis-base"; private static final String SERVICE_KEY = SERVICE_NAME.toUpperCase().replaceAll("-","").replaceAll("_",""); @@ -72,7 +72,7 @@ public class PlanSpec { return new Plan( project(), SERVICE_NAME, new BambooKey(SERVICE_KEY)) - .description("Docker build for vidocp.") + .description("Docker build for cv-analysis.") // .variables() .stages(new Stage("Build Stage") .jobs( diff --git a/bamboo-specs/src/main/resources/scripts/sonar-scan.sh b/bamboo-specs/src/main/resources/scripts/sonar-scan.sh index d53c736..834c288 100755 --- a/bamboo-specs/src/main/resources/scripts/sonar-scan.sh +++ b/bamboo-specs/src/main/resources/scripts/sonar-scan.sh @@ -35,7 +35,7 @@ then echo "Sonar Scan for branch: ${bamboo_planRepository_1_branch}" /usr/bin/sonar-scanner/bin/sonar-scanner -X\ -Dsonar.projectKey=RED_$SERVICE_NAME \ - -Dsonar.sources=src,vidocp \ + -Dsonar.sources=src,cv_analysis \ -Dsonar.host.url=https://sonarqube.iqser.com \ -Dsonar.login=${bamboo_sonarqube_api_token_secret} \ -Dsonar.branch.name=${bamboo_planRepository_1_branch} \ @@ -48,7 +48,7 @@ else echo "Sonar Scan for PR with key1: ${bamboo_repository_pr_key}" /usr/bin/sonar-scanner/bin/sonar-scanner \ -Dsonar.projectKey=RED_$SERVICE_NAME \ - -Dsonar.sources=src,vidocp \ + -Dsonar.sources=src,cv_analysis \ -Dsonar.host.url=https://sonarqube.iqser.com \ -Dsonar.login=${bamboo_sonarqube_api_token_secret} \ -Dsonar.pullrequest.key=${bamboo_repository_pr_key} \ diff --git a/bamboo-specs/target/classes/buildjob/PlanSpec.class b/bamboo-specs/target/classes/buildjob/PlanSpec.class index 802ed7abf1190cb0e24a4b6325e37f4c821d94bb..a55fd3025a515bcd36cb9445e1237e250348d565 100644 GIT binary patch delta 148 zcmeATUmZT7z>qt+OgAwvF{iRPvzU=VFxWLH%+uL5-p|q3HQvcF*p-8Uhmk=5NzumX zhP<+}F8RsXsYMD&rI|S?3TgR83P>t8Kj6(|mykgcL6Hp?mE{x^2ntfrM-gt2 np.array: diff --git a/vidocp/utils/detection.py b/cv_analysis/utils/detection.py similarity index 100% rename from vidocp/utils/detection.py rename to cv_analysis/utils/detection.py diff --git a/vidocp/utils/display.py b/cv_analysis/utils/display.py similarity index 100% rename from vidocp/utils/display.py rename to cv_analysis/utils/display.py diff --git a/vidocp/utils/draw.py b/cv_analysis/utils/draw.py similarity index 92% rename from vidocp/utils/draw.py rename to cv_analysis/utils/draw.py index 2f7ef06..0031f62 100644 --- a/vidocp/utils/draw.py +++ b/cv_analysis/utils/draw.py @@ -1,6 +1,6 @@ import cv2 -from vidocp.utils import copy_and_normalize_channels +from cv_analysis.utils import copy_and_normalize_channels def draw_contours(image, contours): diff --git a/vidocp/utils/filters.py b/cv_analysis/utils/filters.py similarity index 100% rename from vidocp/utils/filters.py rename to cv_analysis/utils/filters.py diff --git a/vidocp/utils/logging.py b/cv_analysis/utils/logging.py similarity index 93% rename from vidocp/utils/logging.py rename to cv_analysis/utils/logging.py index 729fdb4..792a3fa 100644 --- a/vidocp/utils/logging.py +++ b/cv_analysis/utils/logging.py @@ -2,7 +2,7 @@ import sys import logging -from vidocp.config import CONFIG +from cv_analysis.config import CONFIG def get_logger(): diff --git a/vidocp/utils/post_processing.py b/cv_analysis/utils/post_processing.py similarity index 100% rename from vidocp/utils/post_processing.py rename to cv_analysis/utils/post_processing.py diff --git a/vidocp/utils/preprocessing.py b/cv_analysis/utils/preprocessing.py similarity index 95% rename from vidocp/utils/preprocessing.py rename to cv_analysis/utils/preprocessing.py index 5f91393..70bab5e 100644 --- a/vidocp/utils/preprocessing.py +++ b/cv_analysis/utils/preprocessing.py @@ -3,7 +3,7 @@ import pdf2image from PIL import Image import cv2 -from vidocp.utils.deskew import deskew +from cv_analysis.utils.deskew import deskew def preprocess_pdf_image(page): diff --git a/vidocp/utils/test_metrics.py b/cv_analysis/utils/test_metrics.py similarity index 100% rename from vidocp/utils/test_metrics.py rename to cv_analysis/utils/test_metrics.py diff --git a/vidocp/utils/text.py b/cv_analysis/utils/text.py similarity index 100% rename from vidocp/utils/text.py rename to cv_analysis/utils/text.py diff --git a/vidocp/utils/utils.py b/cv_analysis/utils/utils.py similarity index 100% rename from vidocp/utils/utils.py rename to cv_analysis/utils/utils.py diff --git a/scripts/annotate.py b/scripts/annotate.py index 9ef1bce..306e60c 100644 --- a/scripts/annotate.py +++ b/scripts/annotate.py @@ -1,9 +1,9 @@ import argparse -from vidocp.table_parsing import annotate_tables_in_pdf -from vidocp.redaction_detection import annotate_redactions_in_pdf -from vidocp.layout_parsing import annotate_layout_in_pdf -from vidocp.figure_detection import detect_figures_in_pdf +from cv_analysis.table_parsing import annotate_tables_in_pdf +from cv_analysis.redaction_detection import annotate_redactions_in_pdf +from cv_analysis.layout_parsing import annotate_layout_in_pdf +from cv_analysis.figure_detection import detect_figures_in_pdf def parse_args(): diff --git a/scripts/client_mock.py b/scripts/client_mock.py index 0caf0a4..ddf8406 100644 --- a/scripts/client_mock.py +++ b/scripts/client_mock.py @@ -4,7 +4,7 @@ import json from multiprocessing.sharedctypes import Value import requests -from vidocp.utils.preprocessing import open_pdf +from cv_analysis.utils.preprocessing import open_pdf def parse_args(): diff --git a/scripts/deskew_demo.py b/scripts/deskew_demo.py index e97df9d..4a4032c 100644 --- a/scripts/deskew_demo.py +++ b/scripts/deskew_demo.py @@ -4,10 +4,10 @@ import numpy as np import pdf2image from PIL import Image -from vidocp.utils.deskew import deskew_histbased # , deskew_linebased -from vidocp.utils.display import show_mpl -from vidocp.utils.draw import draw_stats -from vidocp.table_parsing import parse_table +from cv_analysis.utils.deskew import deskew_histbased # , deskew_linebased +from cv_analysis.utils.display import show_mpl +from cv_analysis.utils.draw import draw_stats +from cv_analysis.table_parsing import parse_table def parse_args(): diff --git a/setup.py b/setup.py index 9fc73a8..28f6e1a 100644 --- a/setup.py +++ b/setup.py @@ -3,11 +3,11 @@ from distutils.core import setup setup( - name="vidocp", + name="cv_analysis", version="0.0.1", description="", author="", author_email="", url="", - packages=["vidocp"], + packages=["cv_analysis"], ) diff --git a/setup/docker.sh b/setup/docker.sh index d46db68..b7da059 100644 --- a/setup/docker.sh +++ b/setup/docker.sh @@ -9,5 +9,5 @@ python3 -m pip install --upgrade pip #pip install 'dvc[ssh]' #dvc pull -docker build -f Dockerfile_base -t vidocp-base . -docker build -f Dockerfile -t vidocp . \ No newline at end of file +docker build -f Dockerfile_base -t cv-analysis-base . +docker build -f Dockerfile -t cv-analysis . \ No newline at end of file diff --git a/src/run_service.py b/src/run_service.py index e8c80ed..1b76edb 100644 --- a/src/run_service.py +++ b/src/run_service.py @@ -8,15 +8,15 @@ from prometheus_client import Counter, Gauge from prometheus_flask_exporter import PrometheusMetrics from waitress import serve -from vidocp.utils import npconvert -from vidocp.utils.preprocessing import preprocess_pdf_image # TODO -from vidocp.table_parsing import parse_table # , detect_tables_in_pdf -from vidocp.redaction_detection import find_redactions # , detect_redactions_in_pdf -from vidocp.layout_parsing import parse_layout # , detect_layout_in_pdf #TODO -from vidocp.figure_detection import detect_figures # , detect_figures_in_pdf #TODO -from vidocp.utils.logging import logger -from vidocp.utils.preprocessing import open_pdf -from vidocp.config import CONFIG +from cv_analysis.utils import npconvert +from cv_analysis.utils.preprocessing import preprocess_pdf_image # TODO +from cv_analysis.table_parsing import parse_table # , detect_tables_in_pdf +from cv_analysis.redaction_detection import find_redactions # , detect_redactions_in_pdf +from cv_analysis.layout_parsing import parse_layout # , detect_layout_in_pdf #TODO +from cv_analysis.figure_detection import detect_figures # , detect_figures_in_pdf #TODO +from cv_analysis.utils.logging import logger +from cv_analysis.utils.preprocessing import open_pdf +from cv_analysis.config import CONFIG def suppress_user_warnings(): @@ -30,9 +30,9 @@ def main(): def run_server(): - file_counter = Counter("vidocp_file_counter", "count processed files") - # page_counter = Counter("vidocp_page_counter", "count pages from processed files") - ram_metric = Gauge("vidocp_memory_usage", "Memory usage in Mb") + file_counter = Counter("cv_analysis_file_counter", "count processed files") + # page_counter = Counter("cv-analysis_page_counter", "count pages from processed files") + ram_metric = Gauge("cv_analysis_memory_usage", "Memory usage in Mb") def start_monitoring(): file_counter.inc() @@ -146,19 +146,19 @@ def annotate(task): def make_art(): - art = """ - - ================================================================================================= - == ==== ============== ================= ========================================== - == ==== ============== ==== ================ ==== ========================================= - == ==== ============== ==== ================ ==== ========================================= - == ==== == == == ==== === ==== === ==== === === = ==== ==== === = === - == == ========== == ==== == == = == === = == = == = == = == = == - === == === ===== === ==== == = == ===== =========== == ======== ==== == ======= - === == === ==== ==== ==== == = == ===== ========= == ========= === ===== ======= - ==== ==== === ===== ==== == = == = == ======== = == ======= = == = == ======= - ===== ===== == == ==== ==== === ========= == ======== ==== === ======= - ================================================================================================= + art = r""" + __ __ + | \ | \ + _______ __ __ ______ _______ ______ | $$ __ __ _______ \$$ _______ + / \| \ / \ ______ | \ | \ | \ | $$| \ | \ / \| \ / \ +| $$$$$$$ \$$\ / $$| \ \$$$$$$\| $$$$$$$\ \$$$$$$\| $$| $$ | $$| $$$$$$$| $$| $$$$$$$ +| $$ \$$\ $$ \$$$$$$/ $$| $$ | $$ / $$| $$| $$ | $$ \$$ \ | $$ \$$ \ +| $$_____ \$$ $$ | $$$$$$$| $$ | $$| $$$$$$$| $$| $$__/ $$ _\$$$$$$\| $$ _\$$$$$$\ + \$$ \ \$$$ \$$ $$| $$ | $$ \$$ $$| $$ \$$ $$| $$| $$| $$ + \$$$$$$$ \$ \$$$$$$$ \$$ \$$ \$$$$$$$ \$$ _\$$$$$$$ \$$$$$$$ \$$ \$$$$$$$ + | \__| $$ + \$$ $$ + \$$$$$$ """ return art diff --git a/vidocp/test/config.py b/vidocp/test/config.py deleted file mode 100644 index 2fafa83..0000000 --- a/vidocp/test/config.py +++ /dev/null @@ -1,4 +0,0 @@ -from vidocp.config import Config -from vidocp.locations import TEST_CONFIG_FILE - -TEST_CONFIG = Config(TEST_CONFIG_FILE)