diff --git a/.coveragerc b/.coveragerc index 41e9e56..a11f805 100644 --- a/.coveragerc +++ b/.coveragerc @@ -8,6 +8,7 @@ omit = */__init__.py */setup.py */build_venv/* + */incl/* source = cv_analysis relative_files = True @@ -36,6 +37,8 @@ omit = */__init__.py */setup.py */build_venv/* + */src/* + */incl/* ignore_errors = True diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..8ff9112 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "incl/pyinfra"] + path = incl/pyinfra + url = ssh://git@git.iqser.com:2222/rr/pyinfra.git diff --git a/Dockerfile_base b/Dockerfile_base index b564b73..0838921 100644 --- a/Dockerfile_base +++ b/Dockerfile_base @@ -13,6 +13,8 @@ COPY . ./ # Install dependencies. RUN python3 -m pip install -r requirements.txt +RUN python3 -m pip install -r incl/pyinfra/requirements.txt +RUN python3 -m pip install -e incl/pyinfra # Make a new container and copy all relevant files over to filter out temporary files # produced during setup to reduce the final container's size. diff --git a/bamboo-specs/src/main/resources/scripts/docker-build.sh b/bamboo-specs/src/main/resources/scripts/docker-build.sh index f17638f..42874f6 100755 --- a/bamboo-specs/src/main/resources/scripts/docker-build.sh +++ b/bamboo-specs/src/main/resources/scripts/docker-build.sh @@ -3,7 +3,7 @@ set -e SERVICE_NAME=$1 SERVICE_NAME_BASE=$2 - +# TODO version tag on master push python3 -m venv build_venv source build_venv/bin/activate python3 -m pip install --upgrade pip diff --git a/bamboo-specs/src/main/resources/scripts/sonar-scan.sh b/bamboo-specs/src/main/resources/scripts/sonar-scan.sh index 834c288..db33c81 100755 --- a/bamboo-specs/src/main/resources/scripts/sonar-scan.sh +++ b/bamboo-specs/src/main/resources/scripts/sonar-scan.sh @@ -7,11 +7,11 @@ python3 -m venv build_venv source build_venv/bin/activate python3 -m pip install --upgrade pip -echo "dev setup for unit test and coverage" - pip install -e . pip install -r requirements.txt +pip install -e incl/pyinfra +pip install -r incl/pyinfra/requirements.txt echo "DVC pull step" dvc pull @@ -28,7 +28,7 @@ echo "dependency-check:aggregate" mkdir -p reports dependency-check --enableExperimental -f JSON -f HTML -f XML \ --disableAssembly -s . -o reports --project $SERVICE_NAME --exclude ".git/**" --exclude "venv/**" \ - --exclude "build_venv/**" --exclude "**/__pycache__/**" + --exclude "build_venv/**" --exclude "**/__pycache__/**" --exclude "bamboo-specs/**" if [[ -z "${bamboo_repository_pr_key}" ]] then diff --git a/config.yaml b/config.yaml index c739249..7b939d1 100644 --- a/config.yaml +++ b/config.yaml @@ -1,27 +1,12 @@ -device: cpu - service: logging_level: $LOGGING_LEVEL_ROOT|INFO # Logging level for log file messages - logfile_path: $LOGFILE_PATH|null # Overwrites the default path for the service logfile (image_service/log.log) monitoring_enabled: $MONITORING_ENABLED|True # if app is doing monitoring or not + logfile_path: $LOGFILE_PATH|null # Overwrites the default path for the service logfile (image_service/log.log) webserver: host: $SERVER_HOST|"127.0.0.1" # webserver address port: $SERVER_PORT|5000 # webserver port - mode: $SERVER_MODE|production # webserver mode: {development, production} - -deskew: - function: identity # function to use: {hist: deskew_histbased, identity: } - preprocess: True - max_abs_angle: 1.5 - delta: 0.1 - test_delta: 0.15 - mode: nearest - verbose: False - filter_strength_h: 3 - -test_dummy: test_dummy visual_logging: - level: $LOGGING_LEVEL_ROOT|INFO # NOTHNG > INFO > DEBUG > ALL + level: $LOGGING_LEVEL_ROOT|INFO # NOTHING > INFO > DEBUG > ALL output_folder: /tmp/debug/ \ No newline at end of file diff --git a/cv_analysis/locations.py b/cv_analysis/locations.py index 07e15f6..5e7bf76 100644 --- a/cv_analysis/locations.py +++ b/cv_analysis/locations.py @@ -18,6 +18,5 @@ HASHED_PDFS_FOR_TESTING = path.join(PDF_FOR_TESTING, "hashed") METADATA_TESTFILES = path.join(DVC_DATA_DIR, "metadata_testing_files.csv") -TEST_DIR = path.join(MODULE_DIR, "test") -TEST_DATA_DIR = path.join(MODULE_DIR, "test", "test_data") -TEST_CONFIG_FILE = path.join(TEST_DIR, "config.yaml") +TEST_DIR = path.join(PACKAGE_ROOT_DIR, "test") +TEST_DATA_DIR = path.join(TEST_DIR, "test_data") diff --git a/cv_analysis/pyinfra_compat.py b/cv_analysis/pyinfra_compat.py index 96013e0..560515a 100644 --- a/cv_analysis/pyinfra_compat.py +++ b/cv_analysis/pyinfra_compat.py @@ -1,33 +1,49 @@ -from cv_analysis.table_parsing import parse_tables -from cv_analysis.redaction_detection import find_redactions -from cv_analysis.layout_parsing import parse_layout +import gzip +from typing import Callable + +from funcy import lmap +from pyinfra.server.utils import make_streamable_and_wrap_in_packing_logic + from cv_analysis.figure_detection import detect_figures +from cv_analysis.layout_parsing import parse_layout +from cv_analysis.redaction_detection import find_redactions +from cv_analysis.table_parsing import parse_tables from cv_analysis.utils.preprocessing import open_img_from_bytes -task_dict = { - "table": parse_tables, - "figure": detect_figures, - "layout": parse_layout, - "redaction": find_redactions, -} +def make_streamable_analysis_fn(analysis_fn: Callable): + """Makes an analysis function streamable for pyinfra server logic. The wrapped function then + works with data and metadata and returns a tuple or generator of tuples with data and metadata. + For more information about the server logic, see the PyInfra documentation. + + Args: + analysis_fn: cv-analysis function + + Returns: + wrapped function + """ + + def analyse(data, metadata: dict): + def format_results(): + return { + **metadata, + "pageWidth": image.shape[1], + "pageHeight": image.shape[0], + "cells": results, + } + + image = open_img_from_bytes(gzip.decompress(data)) + results = lmap(lambda x: x.json_xywh(), analysis_fn(image)) + results_metadata = format_results() if results else {} + return b"", results_metadata + + return make_streamable_and_wrap_in_packing_logic(analyse, batched=False) -def analyze_bytes(img_bytes, page_num, task="table"): - analysis_function = task_dict[task] - page = open_img_from_bytes(img_bytes) - cells = list(map(lambda x: x.json_xywh(), analysis_function(page))) - page_dict = { - "page": page_num, - "pageWidth": page.shape[1], - "pageHeight": page.shape[0], - "cells": cells - } - return page_dict - - -def analyze_bytes_list(img_bytes_list, task="table"): - result = [] - for i, img_bytes in enumerate(img_bytes_list): - result.append(analyze_bytes(img_bytes, i, task=task)) - return result \ No newline at end of file +def get_analysis_fn(operation): + return { + "table_parsing": parse_tables, + "layout_parsing": parse_layout, + "figure_detection": detect_figures, + "redaction_detection": find_redactions, + }[operation] diff --git a/cv_analysis/test/config.py b/cv_analysis/test/config.py deleted file mode 100644 index a0aec7b..0000000 --- a/cv_analysis/test/config.py +++ /dev/null @@ -1,4 +0,0 @@ -from cv_analysis.config import Config -from cv_analysis.locations import TEST_CONFIG_FILE - -TEST_CONFIG = Config(TEST_CONFIG_FILE) diff --git a/cv_analysis/test/config.yaml b/cv_analysis/test/config.yaml deleted file mode 100644 index d34c023..0000000 --- a/cv_analysis/test/config.yaml +++ /dev/null @@ -1,2 +0,0 @@ -table_score_threshold: 0.95 - diff --git a/cv_analysis/test/test_data/ref.json b/cv_analysis/test/test_data/ref.json deleted file mode 100644 index 222d778..0000000 --- a/cv_analysis/test/test_data/ref.json +++ /dev/null @@ -1,54 +0,0 @@ -{ - "images": [ - { - "name": "test1.png", - "source_document": "Amended Residue analytical method for the determ.pdf", - "page": 7 - }, - { - "name": "test2.png", - "source_document": "Amended Residue analytical method for the determ.pdf", - "page": 39 - }, - { - "name": "test3.png", - "source_document": "VV-857853.pdf", - "page": 8 - }, - { - "name": "test4.png", - "source_document": "Sulphur_RAR_09_Volume_3CA_B-7_2021-04-09.pdf", - "page": 25 - }, - { - "name": "test5.png", - "source_document": "Sulphur_RAR_09_Volume_3CA_B-7_2021-04-09.pdf", - "page": 35 - }, - { - "name": "test6.png", - "source_document": "VV-128279.pdf", - "page": 49 - }, - { - "name": "test7.png", - "source_document": "VV-376573.pdf", - "page": 86 - }, - { - "name": "test8.png", - "source_document": "VV-377325.pdf", - "page": 218 - }, - { - "name": "test9.png", - "source_document": "VV-857853.pdf", - "page": 10 - }, - { - "name": "test10.png", - "source_document": "VV-334103.pdf", - "page": 28 - } - ] -} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test1.json b/cv_analysis/test/test_data/test1.json deleted file mode 100644 index b2fdf55..0000000 --- a/cv_analysis/test/test_data/test1.json +++ /dev/null @@ -1,191 +0,0 @@ -{ - "pages": [ - { - "page": 0, - "pageWidth": 2481, - "pageHeight": 3509, - "cells": [ - { - "x": 604, - "y": 400, - "width": 399, - "height": 142 - }, - { - "x": 1006, - "y": 400, - "width": 49, - "height": 142 - }, - { - "x": 1058, - "y": 400, - "width": 1215, - "height": 142 - }, - { - "x": 604, - "y": 545, - "width": 399, - "height": 83 - }, - { - "x": 1006, - "y": 545, - "width": 49, - "height": 83 - }, - { - "x": 1058, - "y": 545, - "width": 1215, - "height": 83 - }, - { - "x": 604, - "y": 631, - "width": 399, - "height": 84 - }, - { - "x": 1006, - "y": 631, - "width": 49, - "height": 84 - }, - { - "x": 1058, - "y": 631, - "width": 1215, - "height": 84 - }, - { - "x": 604, - "y": 718, - "width": 399, - "height": 84 - }, - { - "x": 1006, - "y": 718, - "width": 49, - "height": 84 - }, - { - "x": 1058, - "y": 718, - "width": 1215, - "height": 84 - }, - { - "x": 604, - "y": 805, - "width": 399, - "height": 804 - }, - { - "x": 1006, - "y": 805, - "width": 49, - "height": 804 - }, - { - "x": 1058, - "y": 805, - "width": 1215, - "height": 804 - }, - { - "x": 604, - "y": 1724, - "width": 399, - "height": 84 - }, - { - "x": 1006, - "y": 1724, - "width": 49, - "height": 84 - }, - { - "x": 1058, - "y": 1724, - "width": 1215, - "height": 84 - }, - { - "x": 604, - "y": 1811, - "width": 399, - "height": 83 - }, - { - "x": 1006, - "y": 1811, - "width": 49, - "height": 83 - }, - { - "x": 1058, - "y": 1811, - "width": 1215, - "height": 83 - }, - { - "x": 604, - "y": 1897, - "width": 399, - "height": 84 - }, - { - "x": 1006, - "y": 1897, - "width": 49, - "height": 84 - }, - { - "x": 1058, - "y": 1897, - "width": 1215, - "height": 84 - }, - { - "x": 604, - "y": 1984, - "width": 399, - "height": 84 - }, - { - "x": 1006, - "y": 1984, - "width": 49, - "height": 84 - }, - { - "x": 1058, - "y": 1984, - "width": 1215, - "height": 84 - }, - { - "x": 604, - "y": 2071, - "width": 399, - "height": 813 - }, - { - "x": 1006, - "y": 2071, - "width": 49, - "height": 813 - }, - { - "x": 1058, - "y": 2071, - "width": 1215, - "height": 813 - } - ] - } - ] -} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test1.png b/cv_analysis/test/test_data/test1.png deleted file mode 100644 index a1f0d3b..0000000 Binary files a/cv_analysis/test/test_data/test1.png and /dev/null differ diff --git a/cv_analysis/test/test_data/test10.json b/cv_analysis/test/test_data/test10.json deleted file mode 100644 index d52d3e7..0000000 --- a/cv_analysis/test/test_data/test10.json +++ /dev/null @@ -1,851 +0,0 @@ -{ - "pages": [ - { - "page": 0, - "pageWidth": 2480, - "pageHeight": 3509, - "cells": [ - { - "x": 1828, - "y": 1667, - "width": 382, - "height": 55 - }, - { - "x": 1477, - "y": 1670, - "width": 349, - "height": 55 - }, - { - "x": 1126, - "y": 1673, - "width": 349, - "height": 54 - }, - { - "x": 776, - "y": 1676, - "width": 348, - "height": 54 - }, - { - "x": 425, - "y": 1678, - "width": 348, - "height": 54 - }, - { - "x": 1828, - "y": 1722, - "width": 382, - "height": 55 - }, - { - "x": 1477, - "y": 1725, - "width": 349, - "height": 55 - }, - { - "x": 1126, - "y": 1728, - "width": 349, - "height": 54 - }, - { - "x": 776, - "y": 1730, - "width": 348, - "height": 55 - }, - { - "x": 425, - "y": 1733, - "width": 349, - "height": 54 - }, - { - "x": 1828, - "y": 1777, - "width": 382, - "height": 54 - }, - { - "x": 1478, - "y": 1780, - "width": 348, - "height": 54 - }, - { - "x": 1126, - "y": 1783, - "width": 349, - "height": 53 - }, - { - "x": 776, - "y": 1786, - "width": 348, - "height": 53 - }, - { - "x": 426, - "y": 1788, - "width": 348, - "height": 53 - }, - { - "x": 1828, - "y": 1832, - "width": 382, - "height": 54 - }, - { - "x": 1478, - "y": 1835, - "width": 348, - "height": 54 - }, - { - "x": 1126, - "y": 1837, - "width": 349, - "height": 55 - }, - { - "x": 776, - "y": 1840, - "width": 348, - "height": 54 - }, - { - "x": 426, - "y": 1843, - "width": 348, - "height": 53 - }, - { - "x": 1829, - "y": 1887, - "width": 381, - "height": 54 - }, - { - "x": 1478, - "y": 1890, - "width": 348, - "height": 53 - }, - { - "x": 1126, - "y": 1892, - "width": 349, - "height": 54 - }, - { - "x": 776, - "y": 1895, - "width": 348, - "height": 53 - }, - { - "x": 426, - "y": 1898, - "width": 348, - "height": 53 - }, - { - "x": 1829, - "y": 1941, - "width": 381, - "height": 54 - }, - { - "x": 1478, - "y": 1944, - "width": 348, - "height": 54 - }, - { - "x": 1126, - "y": 1947, - "width": 349, - "height": 53 - }, - { - "x": 776, - "y": 1949, - "width": 348, - "height": 54 - }, - { - "x": 426, - "y": 1952, - "width": 348, - "height": 53 - }, - { - "x": 1829, - "y": 1995, - "width": 381, - "height": 55 - }, - { - "x": 1478, - "y": 1999, - "width": 348, - "height": 53 - }, - { - "x": 1127, - "y": 2001, - "width": 348, - "height": 54 - }, - { - "x": 776, - "y": 2004, - "width": 348, - "height": 53 - }, - { - "x": 426, - "y": 2006, - "width": 348, - "height": 54 - }, - { - "x": 1829, - "y": 2050, - "width": 382, - "height": 54 - }, - { - "x": 1478, - "y": 2053, - "width": 348, - "height": 54 - }, - { - "x": 1127, - "y": 2056, - "width": 349, - "height": 54 - }, - { - "x": 776, - "y": 2058, - "width": 348, - "height": 54 - }, - { - "x": 426, - "y": 2061, - "width": 348, - "height": 54 - }, - { - "x": 1829, - "y": 2105, - "width": 382, - "height": 54 - }, - { - "x": 1478, - "y": 2108, - "width": 349, - "height": 54 - }, - { - "x": 1127, - "y": 2110, - "width": 349, - "height": 54 - }, - { - "x": 776, - "y": 2113, - "width": 348, - "height": 54 - }, - { - "x": 426, - "y": 2116, - "width": 348, - "height": 54 - }, - { - "x": 1829, - "y": 2159, - "width": 382, - "height": 55 - }, - { - "x": 1478, - "y": 2164, - "width": 349, - "height": 52 - }, - { - "x": 1127, - "y": 2166, - "width": 349, - "height": 53 - }, - { - "x": 776, - "y": 2169, - "width": 348, - "height": 53 - }, - { - "x": 426, - "y": 2172, - "width": 348, - "height": 52 - }, - { - "x": 1829, - "y": 2215, - "width": 382, - "height": 55 - }, - { - "x": 1478, - "y": 2218, - "width": 349, - "height": 54 - }, - { - "x": 1127, - "y": 2221, - "width": 349, - "height": 54 - }, - { - "x": 776, - "y": 2224, - "width": 348, - "height": 53 - }, - { - "x": 426, - "y": 2226, - "width": 348, - "height": 54 - }, - { - "x": 1830, - "y": 2271, - "width": 381, - "height": 50 - }, - { - "x": 1479, - "y": 2274, - "width": 348, - "height": 49 - }, - { - "x": 1127, - "y": 2276, - "width": 349, - "height": 50 - }, - { - "x": 776, - "y": 2279, - "width": 348, - "height": 49 - }, - { - "x": 426, - "y": 2282, - "width": 348, - "height": 49 - }, - { - "x": 1830, - "y": 2322, - "width": 383, - "height": 53 - }, - { - "x": 1480, - "y": 2325, - "width": 348, - "height": 53 - }, - { - "x": 1127, - "y": 2328, - "width": 350, - "height": 52 - }, - { - "x": 776, - "y": 2330, - "width": 348, - "height": 53 - }, - { - "x": 426, - "y": 2333, - "width": 348, - "height": 52 - }, - { - "x": 1831, - "y": 2377, - "width": 382, - "height": 52 - }, - { - "x": 1480, - "y": 2380, - "width": 348, - "height": 52 - }, - { - "x": 1127, - "y": 2382, - "width": 350, - "height": 52 - }, - { - "x": 776, - "y": 2385, - "width": 348, - "height": 51 - }, - { - "x": 426, - "y": 2388, - "width": 348, - "height": 51 - }, - { - "x": 1831, - "y": 2430, - "width": 382, - "height": 53 - }, - { - "x": 1480, - "y": 2433, - "width": 348, - "height": 53 - }, - { - "x": 1127, - "y": 2436, - "width": 350, - "height": 53 - }, - { - "x": 777, - "y": 2438, - "width": 348, - "height": 53 - }, - { - "x": 426, - "y": 2441, - "width": 348, - "height": 52 - }, - { - "x": 1831, - "y": 2485, - "width": 383, - "height": 53 - }, - { - "x": 1480, - "y": 2488, - "width": 348, - "height": 52 - }, - { - "x": 1127, - "y": 2490, - "width": 350, - "height": 53 - }, - { - "x": 777, - "y": 2493, - "width": 348, - "height": 52 - }, - { - "x": 427, - "y": 2495, - "width": 348, - "height": 53 - }, - { - "x": 1831, - "y": 2539, - "width": 383, - "height": 53 - }, - { - "x": 1480, - "y": 2542, - "width": 349, - "height": 53 - }, - { - "x": 1127, - "y": 2545, - "width": 350, - "height": 52 - }, - { - "x": 777, - "y": 2547, - "width": 348, - "height": 53 - }, - { - "x": 427, - "y": 2550, - "width": 348, - "height": 52 - }, - { - "x": 1831, - "y": 2593, - "width": 383, - "height": 54 - }, - { - "x": 1480, - "y": 2596, - "width": 349, - "height": 54 - }, - { - "x": 1127, - "y": 2599, - "width": 351, - "height": 53 - }, - { - "x": 777, - "y": 2601, - "width": 348, - "height": 54 - }, - { - "x": 427, - "y": 2604, - "width": 348, - "height": 53 - }, - { - "x": 1831, - "y": 2649, - "width": 383, - "height": 53 - }, - { - "x": 1480, - "y": 2652, - "width": 349, - "height": 52 - }, - { - "x": 1128, - "y": 2654, - "width": 350, - "height": 53 - }, - { - "x": 777, - "y": 2657, - "width": 348, - "height": 52 - }, - { - "x": 427, - "y": 2659, - "width": 348, - "height": 53 - }, - { - "x": 1832, - "y": 2703, - "width": 382, - "height": 54 - }, - { - "x": 1480, - "y": 2706, - "width": 349, - "height": 53 - }, - { - "x": 1128, - "y": 2709, - "width": 350, - "height": 53 - }, - { - "x": 778, - "y": 2711, - "width": 347, - "height": 53 - }, - { - "x": 427, - "y": 2714, - "width": 348, - "height": 53 - }, - { - "x": 1832, - "y": 2758, - "width": 382, - "height": 53 - }, - { - "x": 1481, - "y": 2761, - "width": 348, - "height": 53 - }, - { - "x": 1128, - "y": 2764, - "width": 350, - "height": 52 - }, - { - "x": 778, - "y": 2766, - "width": 348, - "height": 53 - }, - { - "x": 427, - "y": 2769, - "width": 348, - "height": 52 - }, - { - "x": 1832, - "y": 2812, - "width": 382, - "height": 55 - }, - { - "x": 1481, - "y": 2816, - "width": 349, - "height": 53 - }, - { - "x": 1128, - "y": 2818, - "width": 351, - "height": 54 - }, - { - "x": 778, - "y": 2820, - "width": 348, - "height": 54 - }, - { - "x": 428, - "y": 2823, - "width": 347, - "height": 53 - }, - { - "x": 1832, - "y": 2868, - "width": 382, - "height": 54 - }, - { - "x": 1481, - "y": 2871, - "width": 349, - "height": 53 - }, - { - "x": 1128, - "y": 2873, - "width": 351, - "height": 54 - }, - { - "x": 778, - "y": 2876, - "width": 348, - "height": 53 - }, - { - "x": 428, - "y": 2878, - "width": 348, - "height": 53 - }, - { - "x": 1832, - "y": 2923, - "width": 382, - "height": 53 - }, - { - "x": 1481, - "y": 2926, - "width": 349, - "height": 52 - }, - { - "x": 1128, - "y": 2928, - "width": 351, - "height": 53 - }, - { - "x": 778, - "y": 2931, - "width": 348, - "height": 52 - }, - { - "x": 428, - "y": 2933, - "width": 348, - "height": 52 - }, - { - "x": 1832, - "y": 2978, - "width": 382, - "height": 53 - }, - { - "x": 1481, - "y": 2980, - "width": 349, - "height": 53 - }, - { - "x": 1129, - "y": 2983, - "width": 350, - "height": 53 - }, - { - "x": 778, - "y": 2985, - "width": 348, - "height": 53 - }, - { - "x": 428, - "y": 2987, - "width": 348, - "height": 53 - }, - { - "x": 1832, - "y": 3032, - "width": 382, - "height": 53 - }, - { - "x": 1481, - "y": 3035, - "width": 349, - "height": 52 - }, - { - "x": 1129, - "y": 3038, - "width": 350, - "height": 52 - }, - { - "x": 779, - "y": 3040, - "width": 348, - "height": 52 - }, - { - "x": 428, - "y": 3042, - "width": 349, - "height": 53 - }, - { - "x": 1832, - "y": 3086, - "width": 382, - "height": 55 - }, - { - "x": 1481, - "y": 3089, - "width": 349, - "height": 54 - }, - { - "x": 1129, - "y": 3092, - "width": 350, - "height": 54 - }, - { - "x": 779, - "y": 3094, - "width": 348, - "height": 54 - }, - { - "x": 429, - "y": 3097, - "width": 348, - "height": 53 - }, - { - "x": 1832, - "y": 3141, - "width": 382, - "height": 55 - }, - { - "x": 1481, - "y": 3144, - "width": 349, - "height": 55 - }, - { - "x": 1129, - "y": 3147, - "width": 350, - "height": 54 - }, - { - "x": 779, - "y": 3149, - "width": 349, - "height": 55 - }, - { - "x": 429, - "y": 3152, - "width": 348, - "height": 54 - } - ] - } - ] -} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test10.png b/cv_analysis/test/test_data/test10.png deleted file mode 100644 index 9af83ed..0000000 Binary files a/cv_analysis/test/test_data/test10.png and /dev/null differ diff --git a/cv_analysis/test/test_data/test2.json b/cv_analysis/test/test_data/test2.json deleted file mode 100644 index 1deb655..0000000 --- a/cv_analysis/test/test_data/test2.json +++ /dev/null @@ -1,839 +0,0 @@ -{ - "pages": [ - { - "page": 0, - "pageWidth": 2481, - "pageHeight": 3509, - "cells": [ - { - "x": 327, - "y": 609, - "width": 353, - "height": 161 - }, - { - "x": 683, - "y": 609, - "width": 311, - "height": 161 - }, - { - "x": 997, - "y": 609, - "width": 531, - "height": 161 - }, - { - "x": 1531, - "y": 609, - "width": 247, - "height": 161 - }, - { - "x": 1781, - "y": 609, - "width": 246, - "height": 161 - }, - { - "x": 2030, - "y": 609, - "width": 246, - "height": 161 - }, - { - "x": 327, - "y": 773, - "width": 353, - "height": 272 - }, - { - "x": 683, - "y": 773, - "width": 311, - "height": 65 - }, - { - "x": 997, - "y": 773, - "width": 531, - "height": 65 - }, - { - "x": 1531, - "y": 773, - "width": 247, - "height": 65 - }, - { - "x": 1781, - "y": 773, - "width": 246, - "height": 65 - }, - { - "x": 2030, - "y": 773, - "width": 246, - "height": 65 - }, - { - "x": 683, - "y": 841, - "width": 311, - "height": 66 - }, - { - "x": 997, - "y": 841, - "width": 531, - "height": 66 - }, - { - "x": 1531, - "y": 841, - "width": 247, - "height": 66 - }, - { - "x": 1781, - "y": 841, - "width": 246, - "height": 66 - }, - { - "x": 2030, - "y": 841, - "width": 246, - "height": 66 - }, - { - "x": 683, - "y": 910, - "width": 311, - "height": 66 - }, - { - "x": 997, - "y": 910, - "width": 531, - "height": 66 - }, - { - "x": 1531, - "y": 910, - "width": 247, - "height": 66 - }, - { - "x": 1781, - "y": 910, - "width": 246, - "height": 66 - }, - { - "x": 2030, - "y": 910, - "width": 246, - "height": 66 - }, - { - "x": 683, - "y": 979, - "width": 311, - "height": 66 - }, - { - "x": 997, - "y": 979, - "width": 531, - "height": 66 - }, - { - "x": 1531, - "y": 979, - "width": 247, - "height": 66 - }, - { - "x": 1781, - "y": 979, - "width": 246, - "height": 66 - }, - { - "x": 2030, - "y": 979, - "width": 246, - "height": 66 - }, - { - "x": 327, - "y": 1047, - "width": 353, - "height": 273 - }, - { - "x": 683, - "y": 1048, - "width": 311, - "height": 65 - }, - { - "x": 997, - "y": 1048, - "width": 531, - "height": 65 - }, - { - "x": 1531, - "y": 1048, - "width": 247, - "height": 65 - }, - { - "x": 1781, - "y": 1048, - "width": 246, - "height": 65 - }, - { - "x": 2030, - "y": 1048, - "width": 246, - "height": 65 - }, - { - "x": 683, - "y": 1116, - "width": 311, - "height": 66 - }, - { - "x": 997, - "y": 1116, - "width": 531, - "height": 66 - }, - { - "x": 1531, - "y": 1116, - "width": 247, - "height": 66 - }, - { - "x": 1781, - "y": 1116, - "width": 246, - "height": 66 - }, - { - "x": 2030, - "y": 1116, - "width": 246, - "height": 66 - }, - { - "x": 683, - "y": 1185, - "width": 311, - "height": 66 - }, - { - "x": 997, - "y": 1185, - "width": 531, - "height": 66 - }, - { - "x": 1531, - "y": 1185, - "width": 247, - "height": 66 - }, - { - "x": 1781, - "y": 1185, - "width": 246, - "height": 66 - }, - { - "x": 2030, - "y": 1185, - "width": 246, - "height": 66 - }, - { - "x": 683, - "y": 1254, - "width": 311, - "height": 66 - }, - { - "x": 997, - "y": 1254, - "width": 531, - "height": 66 - }, - { - "x": 1531, - "y": 1254, - "width": 247, - "height": 66 - }, - { - "x": 1781, - "y": 1254, - "width": 246, - "height": 66 - }, - { - "x": 2030, - "y": 1254, - "width": 246, - "height": 66 - }, - { - "x": 327, - "y": 1322, - "width": 353, - "height": 273 - }, - { - "x": 683, - "y": 1323, - "width": 311, - "height": 66 - }, - { - "x": 997, - "y": 1323, - "width": 531, - "height": 66 - }, - { - "x": 1531, - "y": 1323, - "width": 247, - "height": 66 - }, - { - "x": 1781, - "y": 1323, - "width": 246, - "height": 66 - }, - { - "x": 2030, - "y": 1323, - "width": 246, - "height": 66 - }, - { - "x": 683, - "y": 1392, - "width": 311, - "height": 65 - }, - { - "x": 997, - "y": 1392, - "width": 531, - "height": 65 - }, - { - "x": 1531, - "y": 1392, - "width": 247, - "height": 65 - }, - { - "x": 1781, - "y": 1392, - "width": 246, - "height": 65 - }, - { - "x": 2030, - "y": 1392, - "width": 246, - "height": 65 - }, - { - "x": 683, - "y": 1460, - "width": 311, - "height": 66 - }, - { - "x": 997, - "y": 1460, - "width": 531, - "height": 66 - }, - { - "x": 1531, - "y": 1460, - "width": 247, - "height": 66 - }, - { - "x": 1781, - "y": 1460, - "width": 246, - "height": 66 - }, - { - "x": 2030, - "y": 1460, - "width": 246, - "height": 66 - }, - { - "x": 683, - "y": 1529, - "width": 311, - "height": 66 - }, - { - "x": 997, - "y": 1529, - "width": 531, - "height": 66 - }, - { - "x": 1531, - "y": 1529, - "width": 247, - "height": 66 - }, - { - "x": 1781, - "y": 1529, - "width": 246, - "height": 66 - }, - { - "x": 2030, - "y": 1529, - "width": 246, - "height": 66 - }, - { - "x": 327, - "y": 1849, - "width": 353, - "height": 161 - }, - { - "x": 683, - "y": 1849, - "width": 311, - "height": 161 - }, - { - "x": 997, - "y": 1849, - "width": 531, - "height": 161 - }, - { - "x": 1531, - "y": 1849, - "width": 247, - "height": 161 - }, - { - "x": 1781, - "y": 1849, - "width": 246, - "height": 161 - }, - { - "x": 2030, - "y": 1849, - "width": 246, - "height": 161 - }, - { - "x": 327, - "y": 2013, - "width": 353, - "height": 272 - }, - { - "x": 683, - "y": 2013, - "width": 311, - "height": 66 - }, - { - "x": 997, - "y": 2013, - "width": 531, - "height": 66 - }, - { - "x": 1531, - "y": 2013, - "width": 247, - "height": 66 - }, - { - "x": 1781, - "y": 2013, - "width": 246, - "height": 66 - }, - { - "x": 2030, - "y": 2013, - "width": 246, - "height": 66 - }, - { - "x": 683, - "y": 2082, - "width": 311, - "height": 65 - }, - { - "x": 997, - "y": 2082, - "width": 531, - "height": 65 - }, - { - "x": 1531, - "y": 2082, - "width": 247, - "height": 65 - }, - { - "x": 1781, - "y": 2082, - "width": 246, - "height": 65 - }, - { - "x": 2030, - "y": 2082, - "width": 246, - "height": 65 - }, - { - "x": 683, - "y": 2150, - "width": 311, - "height": 66 - }, - { - "x": 997, - "y": 2150, - "width": 531, - "height": 66 - }, - { - "x": 1531, - "y": 2150, - "width": 247, - "height": 66 - }, - { - "x": 1781, - "y": 2150, - "width": 246, - "height": 66 - }, - { - "x": 2030, - "y": 2150, - "width": 246, - "height": 66 - }, - { - "x": 683, - "y": 2219, - "width": 311, - "height": 66 - }, - { - "x": 997, - "y": 2219, - "width": 531, - "height": 66 - }, - { - "x": 1531, - "y": 2219, - "width": 247, - "height": 66 - }, - { - "x": 1781, - "y": 2219, - "width": 246, - "height": 66 - }, - { - "x": 2030, - "y": 2219, - "width": 246, - "height": 66 - }, - { - "x": 327, - "y": 2287, - "width": 353, - "height": 273 - }, - { - "x": 683, - "y": 2288, - "width": 311, - "height": 66 - }, - { - "x": 997, - "y": 2288, - "width": 531, - "height": 66 - }, - { - "x": 1531, - "y": 2288, - "width": 247, - "height": 66 - }, - { - "x": 1781, - "y": 2288, - "width": 246, - "height": 66 - }, - { - "x": 2030, - "y": 2288, - "width": 246, - "height": 66 - }, - { - "x": 683, - "y": 2357, - "width": 311, - "height": 65 - }, - { - "x": 997, - "y": 2357, - "width": 531, - "height": 65 - }, - { - "x": 1531, - "y": 2357, - "width": 247, - "height": 65 - }, - { - "x": 1781, - "y": 2357, - "width": 246, - "height": 65 - }, - { - "x": 2030, - "y": 2357, - "width": 246, - "height": 65 - }, - { - "x": 683, - "y": 2425, - "width": 311, - "height": 66 - }, - { - "x": 997, - "y": 2425, - "width": 531, - "height": 66 - }, - { - "x": 1531, - "y": 2425, - "width": 247, - "height": 66 - }, - { - "x": 1781, - "y": 2425, - "width": 246, - "height": 66 - }, - { - "x": 2030, - "y": 2425, - "width": 246, - "height": 66 - }, - { - "x": 683, - "y": 2494, - "width": 311, - "height": 66 - }, - { - "x": 997, - "y": 2494, - "width": 531, - "height": 66 - }, - { - "x": 1531, - "y": 2494, - "width": 247, - "height": 66 - }, - { - "x": 1781, - "y": 2494, - "width": 246, - "height": 66 - }, - { - "x": 2030, - "y": 2494, - "width": 246, - "height": 66 - }, - { - "x": 327, - "y": 2562, - "width": 353, - "height": 273 - }, - { - "x": 683, - "y": 2563, - "width": 311, - "height": 66 - }, - { - "x": 997, - "y": 2563, - "width": 531, - "height": 66 - }, - { - "x": 1531, - "y": 2563, - "width": 247, - "height": 66 - }, - { - "x": 1781, - "y": 2563, - "width": 246, - "height": 66 - }, - { - "x": 2030, - "y": 2563, - "width": 246, - "height": 66 - }, - { - "x": 683, - "y": 2632, - "width": 311, - "height": 65 - }, - { - "x": 997, - "y": 2632, - "width": 531, - "height": 65 - }, - { - "x": 1531, - "y": 2632, - "width": 247, - "height": 65 - }, - { - "x": 1781, - "y": 2632, - "width": 246, - "height": 65 - }, - { - "x": 2030, - "y": 2632, - "width": 246, - "height": 65 - }, - { - "x": 683, - "y": 2700, - "width": 311, - "height": 66 - }, - { - "x": 997, - "y": 2700, - "width": 531, - "height": 66 - }, - { - "x": 1531, - "y": 2700, - "width": 247, - "height": 66 - }, - { - "x": 1781, - "y": 2700, - "width": 246, - "height": 66 - }, - { - "x": 2030, - "y": 2700, - "width": 246, - "height": 66 - }, - { - "x": 683, - "y": 2769, - "width": 311, - "height": 66 - }, - { - "x": 997, - "y": 2769, - "width": 531, - "height": 66 - }, - { - "x": 1531, - "y": 2769, - "width": 247, - "height": 66 - }, - { - "x": 1781, - "y": 2769, - "width": 246, - "height": 66 - }, - { - "x": 2030, - "y": 2769, - "width": 246, - "height": 66 - } - ] - } - ] -} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test2.png b/cv_analysis/test/test_data/test2.png deleted file mode 100644 index 96a4d5e..0000000 Binary files a/cv_analysis/test/test_data/test2.png and /dev/null differ diff --git a/cv_analysis/test/test_data/test3.json b/cv_analysis/test/test_data/test3.json deleted file mode 100644 index b451279..0000000 --- a/cv_analysis/test/test_data/test3.json +++ /dev/null @@ -1,233 +0,0 @@ -{ - "pages": [ - { - "page": 0, - "pageWidth": 2481, - "pageHeight": 3506, - "cells": [ - { - "x": 195, - "y": 1519, - "width": 2091, - "height": 84 - }, - { - "x": 195, - "y": 1604, - "width": 583, - "height": 123 - }, - { - "x": 783, - "y": 1605, - "width": 1503, - "height": 124 - }, - { - "x": 195, - "y": 1730, - "width": 583, - "height": 65 - }, - { - "x": 783, - "y": 1731, - "width": 1502, - "height": 66 - }, - { - "x": 195, - "y": 1798, - "width": 583, - "height": 65 - }, - { - "x": 783, - "y": 1799, - "width": 1502, - "height": 66 - }, - { - "x": 195, - "y": 1866, - "width": 583, - "height": 65 - }, - { - "x": 782, - "y": 1867, - "width": 1503, - "height": 66 - }, - { - "x": 195, - "y": 1934, - "width": 583, - "height": 65 - }, - { - "x": 782, - "y": 1935, - "width": 1503, - "height": 66 - }, - { - "x": 194, - "y": 2003, - "width": 584, - "height": 64 - }, - { - "x": 782, - "y": 2003, - "width": 535, - "height": 66 - }, - { - "x": 1321, - "y": 2005, - "width": 455, - "height": 64 - }, - { - "x": 1780, - "y": 2005, - "width": 505, - "height": 65 - }, - { - "x": 193, - "y": 2071, - "width": 585, - "height": 65 - }, - { - "x": 782, - "y": 2071, - "width": 535, - "height": 66 - }, - { - "x": 1321, - "y": 2073, - "width": 455, - "height": 64 - }, - { - "x": 1780, - "y": 2073, - "width": 505, - "height": 65 - }, - { - "x": 193, - "y": 2139, - "width": 585, - "height": 65 - }, - { - "x": 782, - "y": 2140, - "width": 535, - "height": 65 - }, - { - "x": 1321, - "y": 2141, - "width": 455, - "height": 64 - }, - { - "x": 1780, - "y": 2141, - "width": 505, - "height": 65 - }, - { - "x": 193, - "y": 2207, - "width": 585, - "height": 65 - }, - { - "x": 782, - "y": 2208, - "width": 535, - "height": 65 - }, - { - "x": 1321, - "y": 2209, - "width": 455, - "height": 64 - }, - { - "x": 1780, - "y": 2210, - "width": 505, - "height": 64 - }, - { - "x": 193, - "y": 2275, - "width": 585, - "height": 66 - }, - { - "x": 782, - "y": 2276, - "width": 535, - "height": 65 - }, - { - "x": 1321, - "y": 2277, - "width": 455, - "height": 65 - }, - { - "x": 1780, - "y": 2278, - "width": 505, - "height": 65 - }, - { - "x": 193, - "y": 2343, - "width": 584, - "height": 66 - }, - { - "x": 782, - "y": 2344, - "width": 1503, - "height": 67 - }, - { - "x": 193, - "y": 2412, - "width": 584, - "height": 65 - }, - { - "x": 781, - "y": 2413, - "width": 1504, - "height": 66 - }, - { - "x": 193, - "y": 2480, - "width": 584, - "height": 65 - }, - { - "x": 781, - "y": 2481, - "width": 1504, - "height": 66 - } - ] - } - ] -} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test3.png b/cv_analysis/test/test_data/test3.png deleted file mode 100644 index 036b455..0000000 Binary files a/cv_analysis/test/test_data/test3.png and /dev/null differ diff --git a/cv_analysis/test/test_data/test4.json b/cv_analysis/test/test_data/test4.json deleted file mode 100644 index 0ed435f..0000000 --- a/cv_analysis/test/test_data/test4.json +++ /dev/null @@ -1,203 +0,0 @@ -{ - "pages": [ - { - "page": 0, - "pageWidth": 3508, - "pageHeight": 2481, - "cells": [ - { - "x": 299, - "y": 761, - "width": 232, - "height": 354 - }, - { - "x": 533, - "y": 761, - "width": 235, - "height": 354 - }, - { - "x": 770, - "y": 761, - "width": 205, - "height": 354 - }, - { - "x": 977, - "y": 761, - "width": 211, - "height": 354 - }, - { - "x": 1190, - "y": 761, - "width": 425, - "height": 138 - }, - { - "x": 1617, - "y": 761, - "width": 195, - "height": 354 - }, - { - "x": 1814, - "y": 761, - "width": 168, - "height": 354 - }, - { - "x": 1984, - "y": 761, - "width": 184, - "height": 354 - }, - { - "x": 2170, - "y": 761, - "width": 191, - "height": 354 - }, - { - "x": 2363, - "y": 761, - "width": 274, - "height": 138 - }, - { - "x": 2639, - "y": 761, - "width": 159, - "height": 354 - }, - { - "x": 2800, - "y": 761, - "width": 466, - "height": 354 - }, - { - "x": 1190, - "y": 901, - "width": 141, - "height": 214 - }, - { - "x": 1333, - "y": 901, - "width": 123, - "height": 214 - }, - { - "x": 1458, - "y": 901, - "width": 157, - "height": 214 - }, - { - "x": 2363, - "y": 901, - "width": 130, - "height": 214 - }, - { - "x": 2495, - "y": 901, - "width": 142, - "height": 214 - }, - { - "x": 299, - "y": 1121, - "width": 232, - "height": 581 - }, - { - "x": 533, - "y": 1121, - "width": 235, - "height": 581 - }, - { - "x": 770, - "y": 1121, - "width": 205, - "height": 581 - }, - { - "x": 977, - "y": 1121, - "width": 211, - "height": 581 - }, - { - "x": 1190, - "y": 1121, - "width": 141, - "height": 581 - }, - { - "x": 1333, - "y": 1121, - "width": 123, - "height": 581 - }, - { - "x": 1458, - "y": 1121, - "width": 157, - "height": 581 - }, - { - "x": 1617, - "y": 1121, - "width": 195, - "height": 581 - }, - { - "x": 1814, - "y": 1121, - "width": 168, - "height": 581 - }, - { - "x": 1984, - "y": 1121, - "width": 184, - "height": 581 - }, - { - "x": 2170, - "y": 1121, - "width": 191, - "height": 581 - }, - { - "x": 2363, - "y": 1121, - "width": 130, - "height": 581 - }, - { - "x": 2495, - "y": 1121, - "width": 142, - "height": 581 - }, - { - "x": 2639, - "y": 1121, - "width": 159, - "height": 581 - }, - { - "x": 2800, - "y": 1121, - "width": 466, - "height": 581 - } - ] - } - ] -} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test4.png b/cv_analysis/test/test_data/test4.png deleted file mode 100644 index 181a372..0000000 Binary files a/cv_analysis/test/test_data/test4.png and /dev/null differ diff --git a/cv_analysis/test/test_data/test5.json b/cv_analysis/test/test_data/test5.json deleted file mode 100644 index b2d49bf..0000000 --- a/cv_analysis/test/test_data/test5.json +++ /dev/null @@ -1,563 +0,0 @@ -{ - "pages": [ - { - "page": 0, - "pageWidth": 2481, - "pageHeight": 3508, - "cells": [ - { - "x": 299, - "y": 706, - "width": 154, - "height": 411 - }, - { - "x": 455, - "y": 706, - "width": 239, - "height": 411 - }, - { - "x": 696, - "y": 706, - "width": 210, - "height": 411 - }, - { - "x": 908, - "y": 706, - "width": 818, - "height": 56 - }, - { - "x": 1728, - "y": 706, - "width": 376, - "height": 56 - }, - { - "x": 2106, - "y": 706, - "width": 138, - "height": 411 - }, - { - "x": 908, - "y": 764, - "width": 200, - "height": 353 - }, - { - "x": 1110, - "y": 764, - "width": 214, - "height": 353 - }, - { - "x": 1326, - "y": 764, - "width": 167, - "height": 353 - }, - { - "x": 1495, - "y": 764, - "width": 231, - "height": 353 - }, - { - "x": 1728, - "y": 764, - "width": 230, - "height": 353 - }, - { - "x": 1960, - "y": 764, - "width": 144, - "height": 353 - }, - { - "x": 299, - "y": 1123, - "width": 154, - "height": 283 - }, - { - "x": 455, - "y": 1123, - "width": 239, - "height": 283 - }, - { - "x": 696, - "y": 1123, - "width": 210, - "height": 283 - }, - { - "x": 908, - "y": 1123, - "width": 200, - "height": 283 - }, - { - "x": 1110, - "y": 1123, - "width": 214, - "height": 283 - }, - { - "x": 1326, - "y": 1123, - "width": 167, - "height": 283 - }, - { - "x": 1495, - "y": 1123, - "width": 231, - "height": 283 - }, - { - "x": 1728, - "y": 1123, - "width": 230, - "height": 283 - }, - { - "x": 1960, - "y": 1123, - "width": 144, - "height": 283 - }, - { - "x": 2106, - "y": 1123, - "width": 138, - "height": 283 - }, - { - "x": 299, - "y": 1408, - "width": 154, - "height": 284 - }, - { - "x": 455, - "y": 1408, - "width": 239, - "height": 284 - }, - { - "x": 696, - "y": 1408, - "width": 210, - "height": 284 - }, - { - "x": 908, - "y": 1408, - "width": 200, - "height": 284 - }, - { - "x": 1110, - "y": 1408, - "width": 214, - "height": 284 - }, - { - "x": 1326, - "y": 1408, - "width": 167, - "height": 284 - }, - { - "x": 1495, - "y": 1408, - "width": 231, - "height": 284 - }, - { - "x": 1728, - "y": 1408, - "width": 230, - "height": 284 - }, - { - "x": 1960, - "y": 1408, - "width": 144, - "height": 284 - }, - { - "x": 2106, - "y": 1408, - "width": 138, - "height": 284 - }, - { - "x": 299, - "y": 2090, - "width": 169, - "height": 211 - }, - { - "x": 470, - "y": 2090, - "width": 253, - "height": 211 - }, - { - "x": 725, - "y": 2090, - "width": 229, - "height": 211 - }, - { - "x": 956, - "y": 2090, - "width": 516, - "height": 57 - }, - { - "x": 1474, - "y": 2090, - "width": 496, - "height": 57 - }, - { - "x": 1972, - "y": 2090, - "width": 272, - "height": 211 - }, - { - "x": 956, - "y": 2149, - "width": 184, - "height": 152 - }, - { - "x": 1142, - "y": 2149, - "width": 158, - "height": 152 - }, - { - "x": 1302, - "y": 2149, - "width": 170, - "height": 152 - }, - { - "x": 1474, - "y": 2149, - "width": 262, - "height": 152 - }, - { - "x": 1738, - "y": 2149, - "width": 232, - "height": 152 - }, - { - "x": 299, - "y": 2303, - "width": 169, - "height": 56 - }, - { - "x": 470, - "y": 2303, - "width": 253, - "height": 56 - }, - { - "x": 725, - "y": 2303, - "width": 229, - "height": 56 - }, - { - "x": 956, - "y": 2303, - "width": 184, - "height": 56 - }, - { - "x": 1142, - "y": 2303, - "width": 158, - "height": 56 - }, - { - "x": 1302, - "y": 2303, - "width": 170, - "height": 56 - }, - { - "x": 1474, - "y": 2303, - "width": 262, - "height": 56 - }, - { - "x": 1738, - "y": 2303, - "width": 232, - "height": 56 - }, - { - "x": 1972, - "y": 2303, - "width": 272, - "height": 56 - }, - { - "x": 299, - "y": 2361, - "width": 169, - "height": 204 - }, - { - "x": 470, - "y": 2361, - "width": 253, - "height": 204 - }, - { - "x": 725, - "y": 2361, - "width": 229, - "height": 97 - }, - { - "x": 956, - "y": 2361, - "width": 184, - "height": 97 - }, - { - "x": 1142, - "y": 2361, - "width": 158, - "height": 204 - }, - { - "x": 1302, - "y": 2361, - "width": 170, - "height": 204 - }, - { - "x": 1474, - "y": 2361, - "width": 262, - "height": 97 - }, - { - "x": 1738, - "y": 2361, - "width": 232, - "height": 97 - }, - { - "x": 1972, - "y": 2361, - "width": 272, - "height": 204 - }, - { - "x": 725, - "y": 2460, - "width": 229, - "height": 105 - }, - { - "x": 956, - "y": 2460, - "width": 184, - "height": 105 - }, - { - "x": 1474, - "y": 2460, - "width": 262, - "height": 105 - }, - { - "x": 1738, - "y": 2460, - "width": 232, - "height": 105 - }, - { - "x": 299, - "y": 2567, - "width": 169, - "height": 205 - }, - { - "x": 470, - "y": 2567, - "width": 253, - "height": 205 - }, - { - "x": 725, - "y": 2567, - "width": 229, - "height": 205 - }, - { - "x": 956, - "y": 2567, - "width": 184, - "height": 205 - }, - { - "x": 1142, - "y": 2567, - "width": 158, - "height": 205 - }, - { - "x": 1302, - "y": 2567, - "width": 170, - "height": 205 - }, - { - "x": 1474, - "y": 2567, - "width": 262, - "height": 205 - }, - { - "x": 1738, - "y": 2567, - "width": 232, - "height": 205 - }, - { - "x": 1972, - "y": 2567, - "width": 272, - "height": 205 - }, - { - "x": 299, - "y": 2774, - "width": 169, - "height": 56 - }, - { - "x": 470, - "y": 2774, - "width": 253, - "height": 56 - }, - { - "x": 725, - "y": 2774, - "width": 229, - "height": 56 - }, - { - "x": 956, - "y": 2774, - "width": 184, - "height": 56 - }, - { - "x": 1142, - "y": 2774, - "width": 158, - "height": 56 - }, - { - "x": 1302, - "y": 2774, - "width": 170, - "height": 56 - }, - { - "x": 1474, - "y": 2774, - "width": 262, - "height": 56 - }, - { - "x": 1738, - "y": 2774, - "width": 232, - "height": 56 - }, - { - "x": 1972, - "y": 2774, - "width": 272, - "height": 56 - }, - { - "x": 299, - "y": 2832, - "width": 169, - "height": 205 - }, - { - "x": 470, - "y": 2832, - "width": 253, - "height": 205 - }, - { - "x": 725, - "y": 2832, - "width": 229, - "height": 205 - }, - { - "x": 956, - "y": 2832, - "width": 184, - "height": 205 - }, - { - "x": 1142, - "y": 2832, - "width": 158, - "height": 205 - }, - { - "x": 1302, - "y": 2832, - "width": 170, - "height": 205 - }, - { - "x": 1474, - "y": 2832, - "width": 262, - "height": 205 - }, - { - "x": 1738, - "y": 2832, - "width": 232, - "height": 205 - }, - { - "x": 1972, - "y": 2832, - "width": 272, - "height": 205 - } - ] - } - ] -} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test5.png b/cv_analysis/test/test_data/test5.png deleted file mode 100644 index e678300..0000000 Binary files a/cv_analysis/test/test_data/test5.png and /dev/null differ diff --git a/cv_analysis/test/test_data/test6.json b/cv_analysis/test/test_data/test6.json deleted file mode 100644 index ec866c9..0000000 --- a/cv_analysis/test/test_data/test6.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "pages": [ - { - "page": 0, - "pageWidth": 2480, - "pageHeight": 3509, - "cells": [ - { - "x": 494, - "y": 960, - "width": 562, - "height": 453 - }, - { - "x": 1059, - "y": 962, - "width": 1065, - "height": 224 - }, - { - "x": 1060, - "y": 1190, - "width": 1066, - "height": 225 - }, - { - "x": 500, - "y": 1419, - "width": 559, - "height": 521 - }, - { - "x": 1060, - "y": 1420, - "width": 1070, - "height": 521 - }, - { - "x": 506, - "y": 1945, - "width": 553, - "height": 222 - }, - { - "x": 1063, - "y": 1946, - "width": 1070, - "height": 223 - } - ] - } - ] -} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test6.png b/cv_analysis/test/test_data/test6.png deleted file mode 100644 index 1461f7e..0000000 Binary files a/cv_analysis/test/test_data/test6.png and /dev/null differ diff --git a/cv_analysis/test/test_data/test7.json b/cv_analysis/test/test_data/test7.json deleted file mode 100644 index b007ce2..0000000 --- a/cv_analysis/test/test_data/test7.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "pages": [ - { - "page": 0, - "pageWidth": 2471, - "pageHeight": 3505, - "cells": [ - { - "x": 572, - "y": 725, - "width": 451, - "height": 1785 - }, - { - "x": 1025, - "y": 725, - "width": 505, - "height": 1785 - }, - { - "x": 1533, - "y": 724, - "width": 454, - "height": 1786 - } - ] - } - ] -} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test7.png b/cv_analysis/test/test_data/test7.png deleted file mode 100644 index 7a7b5b6..0000000 Binary files a/cv_analysis/test/test_data/test7.png and /dev/null differ diff --git a/cv_analysis/test/test_data/test8.json b/cv_analysis/test/test_data/test8.json deleted file mode 100644 index ed6e7c5..0000000 --- a/cv_analysis/test/test_data/test8.json +++ /dev/null @@ -1,179 +0,0 @@ -{ - "pages": [ - { - "page": 0, - "pageWidth": 2471, - "pageHeight": 3505, - "cells": [ - { - "x": 1292, - "y": 1009, - "width": 264, - "height": 132 - }, - { - "x": 538, - "y": 1015, - "width": 254, - "height": 124 - }, - { - "x": 797, - "y": 1016, - "width": 253, - "height": 124 - }, - { - "x": 1056, - "y": 1017, - "width": 230, - "height": 123 - }, - { - "x": 538, - "y": 1144, - "width": 253, - "height": 81 - }, - { - "x": 797, - "y": 1145, - "width": 253, - "height": 82 - }, - { - "x": 1056, - "y": 1146, - "width": 230, - "height": 81 - }, - { - "x": 1292, - "y": 1146, - "width": 257, - "height": 81 - }, - { - "x": 538, - "y": 1231, - "width": 253, - "height": 39 - }, - { - "x": 797, - "y": 1232, - "width": 253, - "height": 39 - }, - { - "x": 1055, - "y": 1233, - "width": 230, - "height": 38 - }, - { - "x": 1291, - "y": 1233, - "width": 258, - "height": 38 - }, - { - "x": 538, - "y": 1277, - "width": 253, - "height": 80 - }, - { - "x": 797, - "y": 1277, - "width": 252, - "height": 80 - }, - { - "x": 1055, - "y": 1278, - "width": 230, - "height": 80 - }, - { - "x": 1291, - "y": 1278, - "width": 258, - "height": 80 - }, - { - "x": 538, - "y": 1362, - "width": 253, - "height": 40 - }, - { - "x": 797, - "y": 1363, - "width": 253, - "height": 40 - }, - { - "x": 1055, - "y": 1363, - "width": 230, - "height": 40 - }, - { - "x": 1291, - "y": 1363, - "width": 258, - "height": 40 - }, - { - "x": 538, - "y": 1407, - "width": 253, - "height": 82 - }, - { - "x": 797, - "y": 1408, - "width": 253, - "height": 81 - }, - { - "x": 1055, - "y": 1408, - "width": 231, - "height": 82 - }, - { - "x": 1291, - "y": 1409, - "width": 258, - "height": 81 - }, - { - "x": 538, - "y": 1494, - "width": 254, - "height": 209 - }, - { - "x": 797, - "y": 1494, - "width": 253, - "height": 209 - }, - { - "x": 1055, - "y": 1495, - "width": 231, - "height": 209 - }, - { - "x": 1291, - "y": 1495, - "width": 265, - "height": 214 - } - ] - } - ] -} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test8.png b/cv_analysis/test/test_data/test8.png deleted file mode 100644 index 2ba4f3c..0000000 Binary files a/cv_analysis/test/test_data/test8.png and /dev/null differ diff --git a/cv_analysis/test/test_data/test9.json b/cv_analysis/test/test_data/test9.json deleted file mode 100644 index 3b0dc49..0000000 --- a/cv_analysis/test/test_data/test9.json +++ /dev/null @@ -1,335 +0,0 @@ -{ - "pages": [ - { - "page": 0, - "pageWidth": 2481, - "pageHeight": 3506, - "cells": [ - { - "x": 1913, - "y": 602, - "width": 56, - "height": 670 - }, - { - "x": 1973, - "y": 602, - "width": 56, - "height": 670 - }, - { - "x": 2032, - "y": 603, - "width": 57, - "height": 669 - }, - { - "x": 605, - "y": 647, - "width": 389, - "height": 430 - }, - { - "x": 997, - "y": 646, - "width": 210, - "height": 432 - }, - { - "x": 1211, - "y": 647, - "width": 210, - "height": 432 - }, - { - "x": 1425, - "y": 647, - "width": 66, - "height": 432 - }, - { - "x": 1496, - "y": 647, - "width": 65, - "height": 432 - }, - { - "x": 605, - "y": 1083, - "width": 388, - "height": 350 - }, - { - "x": 997, - "y": 1083, - "width": 209, - "height": 352 - }, - { - "x": 1210, - "y": 1083, - "width": 211, - "height": 352 - }, - { - "x": 1424, - "y": 1083, - "width": 67, - "height": 352 - }, - { - "x": 1495, - "y": 1083, - "width": 66, - "height": 352 - }, - { - "x": 1912, - "y": 1275, - "width": 57, - "height": 669 - }, - { - "x": 1972, - "y": 1275, - "width": 57, - "height": 670 - }, - { - "x": 2032, - "y": 1276, - "width": 57, - "height": 669 - }, - { - "x": 604, - "y": 1439, - "width": 389, - "height": 498 - }, - { - "x": 996, - "y": 1439, - "width": 67, - "height": 499 - }, - { - "x": 1067, - "y": 1439, - "width": 67, - "height": 499 - }, - { - "x": 1138, - "y": 1439, - "width": 68, - "height": 500 - }, - { - "x": 1210, - "y": 1439, - "width": 67, - "height": 500 - }, - { - "x": 1280, - "y": 1439, - "width": 67, - "height": 500 - }, - { - "x": 1351, - "y": 1439, - "width": 69, - "height": 500 - }, - { - "x": 1424, - "y": 1439, - "width": 67, - "height": 500 - }, - { - "x": 1495, - "y": 1439, - "width": 65, - "height": 500 - }, - { - "x": 603, - "y": 1943, - "width": 389, - "height": 291 - }, - { - "x": 996, - "y": 1943, - "width": 209, - "height": 292 - }, - { - "x": 1209, - "y": 1943, - "width": 210, - "height": 292 - }, - { - "x": 1424, - "y": 1943, - "width": 67, - "height": 292 - }, - { - "x": 1494, - "y": 1943, - "width": 66, - "height": 293 - }, - { - "x": 1911, - "y": 1948, - "width": 56, - "height": 669 - }, - { - "x": 1971, - "y": 1948, - "width": 56, - "height": 669 - }, - { - "x": 2030, - "y": 1949, - "width": 57, - "height": 669 - }, - { - "x": 603, - "y": 2239, - "width": 388, - "height": 304 - }, - { - "x": 995, - "y": 2239, - "width": 67, - "height": 305 - }, - { - "x": 1066, - "y": 2239, - "width": 67, - "height": 306 - }, - { - "x": 1137, - "y": 2239, - "width": 68, - "height": 306 - }, - { - "x": 1209, - "y": 2239, - "width": 66, - "height": 306 - }, - { - "x": 1280, - "y": 2240, - "width": 67, - "height": 305 - }, - { - "x": 1351, - "y": 2240, - "width": 68, - "height": 305 - }, - { - "x": 1423, - "y": 2240, - "width": 67, - "height": 305 - }, - { - "x": 1494, - "y": 2240, - "width": 65, - "height": 305 - }, - { - "x": 601, - "y": 2548, - "width": 390, - "height": 783 - }, - { - "x": 995, - "y": 2548, - "width": 66, - "height": 783 - }, - { - "x": 1065, - "y": 2548, - "width": 68, - "height": 783 - }, - { - "x": 1137, - "y": 2549, - "width": 68, - "height": 782 - }, - { - "x": 1209, - "y": 2549, - "width": 66, - "height": 782 - }, - { - "x": 1279, - "y": 2549, - "width": 67, - "height": 782 - }, - { - "x": 1350, - "y": 2549, - "width": 69, - "height": 782 - }, - { - "x": 1423, - "y": 2549, - "width": 67, - "height": 782 - }, - { - "x": 1493, - "y": 2549, - "width": 66, - "height": 782 - }, - { - "x": 1910, - "y": 2622, - "width": 57, - "height": 666 - }, - { - "x": 1970, - "y": 2622, - "width": 57, - "height": 667 - }, - { - "x": 2030, - "y": 2622, - "width": 56, - "height": 667 - } - ] - } - ] -} \ No newline at end of file diff --git a/cv_analysis/test/test_data/test9.png b/cv_analysis/test/test_data/test9.png deleted file mode 100644 index 1c6d1b3..0000000 Binary files a/cv_analysis/test/test_data/test9.png and /dev/null differ diff --git a/cv_analysis/test/unit_tests/table_test.py b/cv_analysis/test/unit_tests/table_test.py deleted file mode 100644 index 4210754..0000000 --- a/cv_analysis/test/unit_tests/table_test.py +++ /dev/null @@ -1,45 +0,0 @@ -from os.path import join -import json - -from cv_analysis.table_parsing import parse_tables -from cv_analysis.locations import TEST_DATA_DIR -from cv_analysis.test.config import TEST_CONFIG -from cv_analysis.utils.test_metrics import compute_document_score -from cv_analysis.utils.preprocessing import open_pdf - - -def test_table_parsing(): - for i in range(1, 11): - - img_path = join(TEST_DATA_DIR, f"test{i}.png") - json_path = join(TEST_DATA_DIR, f"test{i}.json") - pages = open_pdf(img_path) - - result = {"pages": []} - for i, page in enumerate(pages): - result["pages"].append({"page": str(i), "cells": [x.json_xywh() for x in parse_tables(page)]}) - with open(json_path) as f: - annotation = json.load(f) - - score = compute_document_score(result, annotation) - - assert round(score, 3) >= TEST_CONFIG.table_score_threshold - - -""" -def test_table_parsing(): - - img_path = join(TEST_DATA_DIR, "table.jpg") - json_path = join(TEST_DATA_DIR, "table.json") - pages = open_pdf(img_path) - - result = {"pages": []} - for i, page in enumerate(pages): - result["pages"].append({"page": str(i), "cells": [x.xywh() for x in parse_tables(page)]}) - with open(json_path) as f: - annotation = json.load(f) - - score = compute_document_score(result, annotation) - - assert score >= TEST_CONFIG.table_score_threshold -""" diff --git a/cv_analysis/utils/banner.py b/cv_analysis/utils/banner.py new file mode 100644 index 0000000..a483707 --- /dev/null +++ b/cv_analysis/utils/banner.py @@ -0,0 +1,16 @@ +def make_art(): + art = r""" + __ __ + | \ | \ + _______ __ __ ______ _______ ______ | $$ __ __ _______ \$$ _______ + / \| \ / \ ______ | \ | \ | \ | $$| \ | \ / \| \ / \ +| $$$$$$$ \$$\ / $$| \ \$$$$$$\| $$$$$$$\ \$$$$$$\| $$| $$ | $$| $$$$$$$| $$| $$$$$$$ +| $$ \$$\ $$ \$$$$$$/ $$| $$ | $$ / $$| $$| $$ | $$ \$$ \ | $$ \$$ \ +| $$_____ \$$ $$ | $$$$$$$| $$ | $$| $$$$$$$| $$| $$__/ $$ _\$$$$$$\| $$ _\$$$$$$\ + \$$ \ \$$$ \$$ $$| $$ | $$ \$$ $$| $$ \$$ $$| $$| $$| $$ + \$$$$$$$ \$ \$$$$$$$ \$$ \$$ \$$$$$$$ \$$ _\$$$$$$$ \$$$$$$$ \$$ \$$$$$$$ + | \__| $$ + \$$ $$ + \$$$$$$ +""" + return art diff --git a/incl/pyinfra b/incl/pyinfra new file mode 160000 index 0000000..fb0b64f --- /dev/null +++ b/incl/pyinfra @@ -0,0 +1 @@ +Subproject commit fb0b64f8d55933e9651fde5eec0175e7d317655d diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..cd0d17d --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +norecursedirs = incl diff --git a/requirements.txt b/requirements.txt index cc44721..202a3b7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,4 +15,5 @@ envyaml~=1.8 coverage~=5.5 dependency-check~=0.6.0 prometheus-client~=0.13.1 -prometheus_flask_exporter~=0.19.0 \ No newline at end of file +prometheus_flask_exporter~=0.19.0 +funcy==1.17 \ No newline at end of file diff --git a/src/run_service.py b/src/run_service.py deleted file mode 100644 index a86664e..0000000 --- a/src/run_service.py +++ /dev/null @@ -1,139 +0,0 @@ -import json -import tracemalloc -from sys import getsizeof -import logging -from flask import Flask, request, jsonify -from prometheus_client import Counter, Gauge -from prometheus_flask_exporter import PrometheusMetrics -from waitress import serve - -from cv_analysis.utils import npconvert -from cv_analysis.table_parsing import parse_tables -from cv_analysis.redaction_detection import find_redactions -from cv_analysis.layout_parsing import parse_layout -from cv_analysis.figure_detection import detect_figures -from cv_analysis.utils.logging import logger -from cv_analysis.utils.preprocessing import open_pdf -from cv_analysis.utils.structures import Rectangle -from cv_analysis.config import CONFIG - - -def suppress_user_warnings(): - import warnings - - warnings.filterwarnings("ignore") - - -def main(): - file_counter = Counter("cv_analysis_file_counter", "count processed files") - ram_metric = Gauge("cv_analysis_memory_usage", "Memory usage in Mb") - - def start_monitoring(): - file_counter.inc() - _, peak = tracemalloc.get_traced_memory() - ram_metric.set(peak / 10**6) - - logger.info(make_art()) - tracemalloc.start() - - app = Flask(__name__) - metrics = PrometheusMetrics(app=app, path="/prometheus") - - @app.route("/tables", methods=["POST"]) - @metrics.summary("tables_request_time_seconds", "Time spent processing tables request") - def get_tables(): - start_monitoring() - tables = annotate(parse_tables) - return tables - - @app.route("/redactions", methods=["POST"]) - @metrics.summary("redactions_request_time_seconds", "Time spent processing redaction request") - def get_redactions(): - start_monitoring() - redactions = annotate(find_redactions) - return redactions - - @app.route("/figures", methods=["POST"]) - @metrics.summary("figures_request_time_seconds", "Time spent processing figures request") - def get_figures(): - start_monitoring() - figures = annotate(detect_figures) - return figures - - @app.route("/layout", methods=["POST"]) - @metrics.summary("layout_request_time_seconds", "Time spent processing layout request") - def get_layout(): - start_monitoring() - layout = annotate(parse_layout) - return layout - - @app.route("/status", methods=["GET"]) - def status(): - response = "OK" - return jsonify(response) - - logger.info("<3 Annotator ready.") - - mode = CONFIG.webserver.mode - if mode == "development": - app.run(host=CONFIG.webserver.host, port=CONFIG.webserver.port, debug=True) - elif mode == "production": - serve(app, host=CONFIG.webserver.host, port=CONFIG.webserver.port) - logging.info("Production.") - tracemalloc.stop() - - -def make_annotations(pdf, annotation_function): - results = [] - for i, page in enumerate(pdf): - boxes = annotation_function(page) - cells = list(map(lambda x: x.json_xywh(), boxes)) - results.append({"page": i, "pageWidth": page.shape[1], "pageHeight": page.shape[0], "cells": cells}) - output_dict = {"pages": results} - return jsonify(json.dumps(output_dict, default=npconvert)) - - -def get_size(data): - return round(getsizeof(data) / 1000000, 2) - - -def annotate(annotation_function): - def inner(): - data = request.data - logger.info(f"Received data.") - logger.info(f"Processing data.") - pdf = open_pdf(data) - annotations = make_annotations(pdf, annotation_function) - return annotations - - try: - return inner() - except Exception as err: - logger.warning("Analysis failed") - logger.exception(err) - resp = jsonify("Analysis failed") - resp.status_code = 500 - return resp - - -def make_art(): - art = r""" - __ __ - | \ | \ - _______ __ __ ______ _______ ______ | $$ __ __ _______ \$$ _______ - / \| \ / \ ______ | \ | \ | \ | $$| \ | \ / \| \ / \ -| $$$$$$$ \$$\ / $$| \ \$$$$$$\| $$$$$$$\ \$$$$$$\| $$| $$ | $$| $$$$$$$| $$| $$$$$$$ -| $$ \$$\ $$ \$$$$$$/ $$| $$ | $$ / $$| $$| $$ | $$ \$$ \ | $$ \$$ \ -| $$_____ \$$ $$ | $$$$$$$| $$ | $$| $$$$$$$| $$| $$__/ $$ _\$$$$$$\| $$ _\$$$$$$\ - \$$ \ \$$$ \$$ $$| $$ | $$ \$$ $$| $$ \$$ $$| $$| $$| $$ - \$$$$$$$ \$ \$$$$$$$ \$$ \$$ \$$$$$$$ \$$ _\$$$$$$$ \$$$$$$$ \$$ \$$$$$$$ - | \__| $$ - \$$ $$ - \$$$$$$ - -""" - return art - - -if __name__ == "__main__": - main() diff --git a/src/serve.py b/src/serve.py new file mode 100644 index 0000000..c16acf1 --- /dev/null +++ b/src/serve.py @@ -0,0 +1,39 @@ +import logging + +from waitress import serve + +from cv_analysis.config import CONFIG +from cv_analysis.pyinfra_compat import make_streamable_analysis_fn, get_analysis_fn +from cv_analysis.utils.banner import make_art +from cv_analysis.utils.logging import get_logger +from incl.pyinfra.pyinfra.server.server import set_up_processing_server + + +def main(): + logger.info(make_art()) + + operations = ["table_parsing"] + operation2function = {op: make_streamable_analysis_fn(get_analysis_fn(op)) for op in operations} + + server = set_up_processing_server(operation2function) + + host = CONFIG.webserver.host + port = CONFIG.webserver.port + + logger.info(f"Ready, serving on http://{host}/{port}") + + serve(server, host=host, port=port, _quiet=False) + + +if __name__ == "__main__": + logging.basicConfig(level=CONFIG.service.logging_level) + + logging.getLogger("pillow").setLevel(logging.ERROR) + logging.getLogger("pika").setLevel(logging.ERROR) + logging.getLogger("flask").setLevel(logging.ERROR) + logging.getLogger("waitress").setLevel(logging.ERROR) + logging.getLogger("urllib3").setLevel(logging.ERROR) + + logger = get_logger() + + main() diff --git a/test/.gitignore b/test/.gitignore new file mode 100644 index 0000000..81a860e --- /dev/null +++ b/test/.gitignore @@ -0,0 +1 @@ +/test_data diff --git a/cv_analysis/test/__init__.py b/test/__init__.py similarity index 100% rename from cv_analysis/test/__init__.py rename to test/__init__.py diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 0000000..06ab2f7 --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,6 @@ +pytest_plugins = [ + "test.fixtures.pyinfra_compat", +] + +def pytest_make_parametrize_id(config, val, argname): + return f" {argname}={val} " diff --git a/test/fixtures/__init__.py b/test/fixtures/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/fixtures/pyinfra_compat.py b/test/fixtures/pyinfra_compat.py new file mode 100644 index 0000000..238385f --- /dev/null +++ b/test/fixtures/pyinfra_compat.py @@ -0,0 +1,61 @@ +import gzip +import io + +import numpy as np +import pytest +from PIL import Image +from funcy import first + +from cv_analysis.pyinfra_compat import get_analysis_fn +from cv_analysis.utils.preprocessing import open_img_from_bytes +from cv_analysis.utils.structures import Rectangle +from incl.pyinfra.pyinfra.server.packing import bytes_to_string, string_to_bytes + + +@pytest.fixture +def random_image(): + return np.random.rand(100, 100, 3) * 255 + + +@pytest.fixture +def random_image_as_bytes_and_compressed(random_image): + image = Image.fromarray(random_image.astype("uint8")).convert("RGBA") + img_byte_arr = io.BytesIO() + image.save(img_byte_arr, format="PNG") + return gzip.compress(img_byte_arr.getvalue()) + + +@pytest.fixture +def random_image_metadata_package(random_image_as_bytes_and_compressed): + data = bytes_to_string(random_image_as_bytes_and_compressed) + return [{"data": data, "metadata": {"key": "value", "key2": "value2"}}] + + +@pytest.fixture +def expected_analyse_metadata(operation, random_image_metadata_package): + metadata = first(random_image_metadata_package) + image = open_img_from_bytes(gzip.decompress(string_to_bytes(metadata["data"]))) + wrapped_metadata = { + **metadata["metadata"], + "pageWidth": image.shape[1], + "pageHeight": image.shape[0], + } + + if operation == "mock": + return { + **wrapped_metadata, + "cells": [{"x": 0, "y": 0, "width": image.shape[1], "height": image.shape[0]}], + } + if operation == "table_parsing": + return {} + + +@pytest.fixture +def analyse_fn(operation): + if operation == "mock": + + def analyse_mock(image: np.ndarray): + return [Rectangle.from_xywh((0, 0, image.shape[1], image.shape[0]))] + + return analyse_mock + return get_analysis_fn(operation) diff --git a/test/test_data.dvc b/test/test_data.dvc new file mode 100644 index 0000000..9ee9530 --- /dev/null +++ b/test/test_data.dvc @@ -0,0 +1,5 @@ +outs: +- md5: f74c866991f90b519dd334980ce0d495.dir + size: 2832497 + nfiles: 21 + path: test_data diff --git a/cv_analysis/test/unit_tests/config_test.py b/test/unit_tests/config_test.py similarity index 52% rename from cv_analysis/test/unit_tests/config_test.py rename to test/unit_tests/config_test.py index 678b4b7..049d4f7 100644 --- a/cv_analysis/test/unit_tests/config_test.py +++ b/test/unit_tests/config_test.py @@ -2,4 +2,5 @@ from cv_analysis.config import CONFIG def test_config(): - assert CONFIG.test_dummy == "test_dummy" + assert CONFIG.service + assert CONFIG.webserver diff --git a/test/unit_tests/pyinfra_compat_test.py b/test/unit_tests/pyinfra_compat_test.py new file mode 100644 index 0000000..29cf0c4 --- /dev/null +++ b/test/unit_tests/pyinfra_compat_test.py @@ -0,0 +1,31 @@ +import pytest +from funcy import first + +from cv_analysis.figure_detection import detect_figures +from cv_analysis.layout_parsing import parse_layout +from cv_analysis.pyinfra_compat import get_analysis_fn, make_streamable_analysis_fn +from cv_analysis.redaction_detection import find_redactions +from cv_analysis.table_parsing import parse_tables + + +@pytest.mark.parametrize( + "analysis_fn_name,analysis_fn", + [ + ("table_parsing", parse_tables), + ("layout_parsing", parse_layout), + ("figure_detection", detect_figures), + ("redaction_detection", find_redactions), + ], +) +def test_get_analysis_fn(analysis_fn_name, analysis_fn): + fn = get_analysis_fn + assert fn(analysis_fn_name) == analysis_fn + + +@pytest.mark.parametrize("operation", ["mock", "table_parsing"]) +def test_make_analysis_fn(analyse_fn, random_image_metadata_package, expected_analyse_metadata): + analyse = make_streamable_analysis_fn(analyse_fn) + results = first(analyse(random_image_metadata_package)) + + assert results["metadata"] == expected_analyse_metadata + diff --git a/test/unit_tests/table_parsing_test.py b/test/unit_tests/table_parsing_test.py new file mode 100644 index 0000000..888aeec --- /dev/null +++ b/test/unit_tests/table_parsing_test.py @@ -0,0 +1,35 @@ +import json +from os.path import join + +import pytest +from funcy import first + +from cv_analysis.locations import TEST_DATA_DIR +from cv_analysis.table_parsing import parse_tables +from cv_analysis.utils.preprocessing import open_pdf +from cv_analysis.utils.test_metrics import compute_document_score + + +@pytest.mark.parametrize("score_threshold", [0.95]) +@pytest.mark.parametrize("test_file_index", range(1, 11)) +def test_table_parsing(score_threshold, image_with_tables, expected_table_annotation, test_file_index): + + result = [x.json_xywh() for x in parse_tables(image_with_tables)] + formatted_result = {"pages": [{"page": str(test_file_index), "cells": result}]} + + score = compute_document_score(formatted_result, expected_table_annotation) + + assert round(score, 3) >= score_threshold + + +@pytest.fixture +def image_with_tables(test_file_index): + img_path = join(TEST_DATA_DIR, f"test{test_file_index}.png") + return first(open_pdf(img_path)) + + +@pytest.fixture +def expected_table_annotation(test_file_index): + json_path = join(TEST_DATA_DIR, f"test{test_file_index}.json") + with open(json_path) as f: + return json.load(f)