From fc8a9e15f8e32b508c74632fb12fdba6e220447d Mon Sep 17 00:00:00 2001 From: Julius Unverfehrt Date: Thu, 30 Jun 2022 14:50:58 +0200 Subject: [PATCH] Pull request #12: Diff font sizes on page Merge in RR/cv-analysis from diff-font-sizes-on-page to master Squashed commit of the following: commit d1b32a3e8fadd45d38040e1ba96672ace240ae29 Author: Julius Unverfehrt Date: Thu Jun 30 14:43:30 2022 +0200 add tests for figure detection first iteration commit c38a7701afaad513320f157fe7188b3f11a682ac Author: Julius Unverfehrt Date: Thu Jun 30 14:26:08 2022 +0200 update text tests with new test cases commit ccc0c1a177c7d69c9575ec0267a492c3eef008e3 Author: llocarnini Date: Wed Jun 29 23:09:24 2022 +0200 added fixture for different scaled text on page and parameter for different font style commit 5f36a634caad2849e673de7d64abb5b6c3a6055f Author: Julius Unverfehrt Date: Tue Jun 28 17:03:52 2022 +0200 add pdf2pdf annotate script for figure detection commit 7438c170371e166e82ab19f9dfdf1bddd89b7bb3 Author: Julius Unverfehrt Date: Tue Jun 28 16:24:52 2022 +0200 optimize algorithm commit 93bf8820f856d3815bab36b13c0df189c45d01e0 Author: Julius Unverfehrt Date: Tue Jun 28 16:11:15 2022 +0200 black commit 59c639eec7d3f9da538b0ad6cd6215456c92eb58 Author: Julius Unverfehrt Date: Tue Jun 28 16:10:39 2022 +0200 add tests for figure detection pipeline commit bada688d88231843e9d299d255d9c4e0d5ca9788 Author: Julius Unverfehrt Date: Tue Jun 28 13:34:36 2022 +0200 refactor tests commit 614388a18b46d670527727c11f63e8174aed3736 Author: Julius Unverfehrt Date: Tue Jun 28 13:34:14 2022 +0200 introduce pipeline logic for figure detection commit 7195f892d543294829aebe80e260b4395b89cb36 Author: Julius Unverfehrt Date: Tue Jun 28 11:58:41 2022 +0200 update reqs commit 4408e7975853196c5e363dd2ddf62e15fe6f4944 Author: Julius Unverfehrt Date: Tue Jun 28 11:56:16 2022 +0200 add figure detection test commit 5ff472c2d96238ca2bc1d2368d3d02e62db98713 Author: Julius Unverfehrt Date: Tue Jun 28 11:56:09 2022 +0200 add figure detection test commit 66c1307e57c84789d64cb8e41d8e923ac98eebde Author: Julius Unverfehrt Date: Tue Jun 28 10:36:50 2022 +0200 refactor draw boxes to work as intended on inversed image commit 00a39050d051ae43b2a8f2c4efd6bfbd2609dead Author: Julius Unverfehrt Date: Tue Jun 28 10:36:11 2022 +0200 refactor module structure commit f8af01894c387468334a332e75f7dbf545a91f86 Author: Julius Unverfehrt Date: Mon Jun 27 17:07:47 2022 +0200 add: figure detection now agnostic to input image background color, refactor tests commit 3bc63da783bced571d53b29b6d82648c9f93e886 Author: Julius Unverfehrt Date: Mon Jun 27 14:31:15 2022 +0200 add text removal tests commit 6e794a7cee3fd7633aa5084839775877b0f8794c Author: Julius Unverfehrt Date: Mon Jun 27 12:12:27 2022 +0200 figure detection tests WIP commit f8b20d4c9845de6434142e3dab69ce467fbc7a75 Author: Julius Unverfehrt Date: Fri Jun 24 15:39:37 2022 +0200 add tests for figure_detection WIP commit f2a52a07a5e261962214dff40ba710c93993f6fb Author: llocarnini Date: Fri Jun 24 14:28:44 2022 +0200 added third test case "figure_and_text" commit 8f45c88278cdcd32a121ea8269c8eca816bffd0b Author: Julius Unverfehrt Date: Fri Jun 24 13:25:17 2022 +0200 add tests for figure_detection --- cv_analysis/figure_detection.py | 37 -------- cv_analysis/figure_detection/__init__.py | 0 .../figure_detection_pipeline.py | 37 ++++++++ .../figures.py} | 15 +-- cv_analysis/figure_detection/text.py | 51 ++++++++++ cv_analysis/server/__init__.py | 0 cv_analysis/{ => server}/pyinfra_compat.py | 13 --- cv_analysis/utils/filters.py | 4 + cv_analysis/utils/text.py | 64 ------------- requirements.txt | 3 +- scripts/annotate.py | 3 +- scripts/annotate_figures.py | 38 ++++++++ src/serve.py | 10 +- test/conftest.py | 4 +- test/fixtures/figure_detection.py | 93 +++++++++++++++++++ .../fixtures/{pyinfra_compat.py => server.py} | 17 +--- test/unit_tests/__init__.py | 0 test/unit_tests/figure_detection/__init__.py | 0 .../figure_detection_pipeline_test.py | 55 +++++++++++ test/unit_tests/figure_detection/text_test.py | 40 ++++++++ test/unit_tests/pyinfra_compat_test.py | 31 ------- test/unit_tests/server/__init__.py | 0 test/unit_tests/server/pyinfra_compat_test.py | 13 +++ test/utils/__init__.py | 0 test/utils/utils.py | 6 ++ 25 files changed, 362 insertions(+), 172 deletions(-) delete mode 100644 cv_analysis/figure_detection.py create mode 100644 cv_analysis/figure_detection/__init__.py create mode 100644 cv_analysis/figure_detection/figure_detection_pipeline.py rename cv_analysis/{utils/detection.py => figure_detection/figures.py} (62%) create mode 100644 cv_analysis/figure_detection/text.py create mode 100644 cv_analysis/server/__init__.py rename cv_analysis/{ => server}/pyinfra_compat.py (72%) delete mode 100644 cv_analysis/utils/text.py create mode 100644 scripts/annotate_figures.py create mode 100644 test/fixtures/figure_detection.py rename test/fixtures/{pyinfra_compat.py => server.py} (78%) create mode 100644 test/unit_tests/__init__.py create mode 100644 test/unit_tests/figure_detection/__init__.py create mode 100644 test/unit_tests/figure_detection/figure_detection_pipeline_test.py create mode 100644 test/unit_tests/figure_detection/text_test.py delete mode 100644 test/unit_tests/pyinfra_compat_test.py create mode 100644 test/unit_tests/server/__init__.py create mode 100644 test/unit_tests/server/pyinfra_compat_test.py create mode 100644 test/utils/__init__.py create mode 100644 test/utils/utils.py diff --git a/cv_analysis/figure_detection.py b/cv_analysis/figure_detection.py deleted file mode 100644 index c2f7878..0000000 --- a/cv_analysis/figure_detection.py +++ /dev/null @@ -1,37 +0,0 @@ -import cv2 -import numpy as np -from pdf2image import pdf2image - -# import pandas as pd -from PIL import Image -import timeit -from os import path -from cv_analysis.locations import METADATA_TESTFILES, PNG_FOR_TESTING, PNG_FIGURES_DETECTED -from cv_analysis.utils.detection import detect_large_coherent_structures -from cv_analysis.utils.display import show_mpl -from cv_analysis.utils.draw import draw_rectangles -from cv_analysis.utils.post_processing import remove_included -from cv_analysis.utils.filters import is_large_enough, has_acceptable_format -from cv_analysis.utils.structures import Rectangle -from cv_analysis.utils.text import remove_primary_text_regions -from cv_analysis.utils.visual_logging import vizlogger - - -def is_likely_figure(cont, min_area=5000, max_width_to_hight_ratio=6): - return is_large_enough(cont, min_area) and has_acceptable_format(cont, max_width_to_hight_ratio) - - -def detect_figures(image: np.array): - - image = image.copy() - vizlogger.debug(image, "figures01_start.png") - - image = remove_primary_text_regions(image) - vizlogger.debug(image, "figures02_remove_text.png") - cnts = detect_large_coherent_structures(image) - - cnts = filter(is_likely_figure, cnts) - rects = map(cv2.boundingRect, cnts) - rects = remove_included(rects) - - return list(map(Rectangle.from_xywh, rects)) diff --git a/cv_analysis/figure_detection/__init__.py b/cv_analysis/figure_detection/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cv_analysis/figure_detection/figure_detection_pipeline.py b/cv_analysis/figure_detection/figure_detection_pipeline.py new file mode 100644 index 0000000..9a98be7 --- /dev/null +++ b/cv_analysis/figure_detection/figure_detection_pipeline.py @@ -0,0 +1,37 @@ +from functools import partial + +import cv2 +import numpy as np + +from cv_analysis.figure_detection.figures import detect_large_coherent_structures +from cv_analysis.figure_detection.text import remove_primary_text_regions +from cv_analysis.utils.filters import is_large_enough, has_acceptable_format, is_not_too_large +from cv_analysis.utils.post_processing import remove_included +from cv_analysis.utils.structures import Rectangle + + +def make_figure_detection_pipeline(min_area=5000, max_width_to_height_ratio=6): + def pipeline(image: np.array): + max_area = image.shape[0] * image.shape[1] * 0.99 + filter_cnts = make_filter_likely_figures(min_area, max_area, max_width_to_height_ratio) + + image = remove_primary_text_regions(image) + cnts = detect_large_coherent_structures(image) + cnts = filter_cnts(cnts) + + rects = remove_included(map(cv2.boundingRect, cnts)) + rectangles = map(Rectangle.from_xywh, rects) + return rectangles + + return pipeline + + +def make_filter_likely_figures(min_area, max_area, max_width_to_height_ratio): + def is_likely_figure(cnts): + return ( + is_not_too_large(cnts, max_area) + and is_large_enough(cnts, min_area) + and has_acceptable_format(cnts, max_width_to_height_ratio) + ) + + return partial(filter, is_likely_figure) diff --git a/cv_analysis/utils/detection.py b/cv_analysis/figure_detection/figures.py similarity index 62% rename from cv_analysis/utils/detection.py rename to cv_analysis/figure_detection/figures.py index be86b27..18a5d16 100644 --- a/cv_analysis/utils/detection.py +++ b/cv_analysis/figure_detection/figures.py @@ -4,21 +4,22 @@ import numpy as np def detect_large_coherent_structures(image: np.array): """Detects large coherent structures on an image. + Expects an image with binary color space (e.g. threshold applied). + + Returns: + contours References: https://stackoverflow.com/questions/60259169/how-to-group-nearby-contours-in-opencv-python-zebra-crossing-detection """ - if len(image.shape) > 2: - image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - - image = cv2.threshold(image, 253, 255, cv2.THRESH_BINARY)[1] + assert len(image.shape) == 2 dilate_kernel = cv2.getStructuringElement(cv2.MORPH_OPEN, (5, 5)) - dilate = cv2.dilate(~image, dilate_kernel, iterations=4) + dilate = cv2.dilate(image, dilate_kernel, iterations=4) close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 20)) close = cv2.morphologyEx(dilate, cv2.MORPH_CLOSE, close_kernel, iterations=1) - counts, _ = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + cnts, _ = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) - return counts + return cnts diff --git a/cv_analysis/figure_detection/text.py b/cv_analysis/figure_detection/text.py new file mode 100644 index 0000000..11fd832 --- /dev/null +++ b/cv_analysis/figure_detection/text.py @@ -0,0 +1,51 @@ +import cv2 + + +def remove_primary_text_regions(image): + """Removes regions of primary text, meaning no figure descriptions for example, but main text body paragraphs. + + Args: + image: Image to remove primary text from. + + Returns: + Image with primary text removed. + + References: + https://stackoverflow.com/questions/58349726/opencv-how-to-remove-text-from-background + """ + + image = apply_threshold_to_image(image) + + threshold_image = image.copy() + + close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (17, 7)) # 20,3 + close = cv2.morphologyEx(image, cv2.MORPH_CLOSE, close_kernel, iterations=1) + + dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 3)) # 5,3 + dilate = cv2.dilate(close, dilate_kernel, iterations=1) + + cnts, _ = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + cnts = filter(is_likely_primary_text_segment, cnts) + + rects = map(cv2.boundingRect, cnts) + + image = draw_bboxes(threshold_image, rects) + return image + + +def apply_threshold_to_image(image): + """Converts an image to black and white.""" + image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image + return cv2.threshold(image, 253, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] + + +def is_likely_primary_text_segment(cnt): + x, y, w, h = cv2.boundingRect(cnt) + return 400 < cv2.contourArea(cnt) < 16000 or w / h > 3 + + +def draw_bboxes(image, bboxes): + for rect in bboxes: + x, y, w, h = rect + cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1) + return image diff --git a/cv_analysis/server/__init__.py b/cv_analysis/server/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cv_analysis/pyinfra_compat.py b/cv_analysis/server/pyinfra_compat.py similarity index 72% rename from cv_analysis/pyinfra_compat.py rename to cv_analysis/server/pyinfra_compat.py index 560515a..e9e8b7b 100644 --- a/cv_analysis/pyinfra_compat.py +++ b/cv_analysis/server/pyinfra_compat.py @@ -4,10 +4,6 @@ from typing import Callable from funcy import lmap from pyinfra.server.utils import make_streamable_and_wrap_in_packing_logic -from cv_analysis.figure_detection import detect_figures -from cv_analysis.layout_parsing import parse_layout -from cv_analysis.redaction_detection import find_redactions -from cv_analysis.table_parsing import parse_tables from cv_analysis.utils.preprocessing import open_img_from_bytes @@ -38,12 +34,3 @@ def make_streamable_analysis_fn(analysis_fn: Callable): return b"", results_metadata return make_streamable_and_wrap_in_packing_logic(analyse, batched=False) - - -def get_analysis_fn(operation): - return { - "table_parsing": parse_tables, - "layout_parsing": parse_layout, - "figure_detection": detect_figures, - "redaction_detection": find_redactions, - }[operation] diff --git a/cv_analysis/utils/filters.py b/cv_analysis/utils/filters.py index 274925c..1f6bdd2 100644 --- a/cv_analysis/utils/filters.py +++ b/cv_analysis/utils/filters.py @@ -5,6 +5,10 @@ def is_large_enough(cont, min_area): return cv2.contourArea(cont, False) > min_area +def is_not_too_large(cnt, max_area): + return cv2.contourArea(cnt, False) < max_area + + def has_acceptable_format(cont, max_width_to_height_ratio): _, _, w, h = cv2.boundingRect(cont) return max_width_to_height_ratio >= w / h >= (1 / max_width_to_height_ratio) diff --git a/cv_analysis/utils/text.py b/cv_analysis/utils/text.py deleted file mode 100644 index 8be13fe..0000000 --- a/cv_analysis/utils/text.py +++ /dev/null @@ -1,64 +0,0 @@ -import cv2 -import numpy as np - -from cv_analysis.utils.display import show_mpl - - -def remove_primary_text_regions(image): - """Removes regions of primary text, meaning no figure descriptions for example, but main text body paragraphs. - - Args: - image: Image to remove primary text from. - - Returns: - Image with primary text removed. - """ - - image = image.copy() - - cnts = find_primary_text_regions(image) - for cnt in cnts: - x, y, w, h = cv2.boundingRect(cnt) - cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), -1) - # show_mpl(image) - return image - - -def find_primary_text_regions(image): - """Finds regions of primary text, meaning no figure descriptions for example, but main text body paragraphs. - - Args: - image: Image to remove primary text from. - - Returns: - Image with primary text removed. - - References: - https://stackoverflow.com/questions/58349726/opencv-how-to-remove-text-from-background - """ - - def is_likely_primary_text_segments(cnt): - x, y, w, h = cv2.boundingRect(cnt) - return 800 < cv2.contourArea(cnt) < 16000 or w / h > 3 - - image = image.copy() - - if len(image.shape) > 2: - image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - - image = cv2.threshold(image, 253, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] - - close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (17, 7)) # 20,3 - close = cv2.morphologyEx(image, cv2.MORPH_CLOSE, close_kernel, iterations=1) - - # show_mpl(close) - - dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 3)) # 5,3 - dilate = cv2.dilate(close, dilate_kernel, iterations=1) - - # show_mpl(dilate) - - cnts, _ = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) - cnts = filter(is_likely_primary_text_segments, cnts) - - return cnts diff --git a/requirements.txt b/requirements.txt index e0a8a64..dc2ae9d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,4 +15,5 @@ scipy~=1.8.0 coverage~=5.5 dependency-check~=0.6.0 prometheus-client~=0.13.1 -prometheus_flask_exporter~=0.19.0 \ No newline at end of file +prometheus_flask_exporter~=0.19.0 +lorem-text==2.1 \ No newline at end of file diff --git a/scripts/annotate.py b/scripts/annotate.py index 0593ab9..cac9b45 100644 --- a/scripts/annotate.py +++ b/scripts/annotate.py @@ -45,5 +45,6 @@ if __name__ == "__main__": elif args.type == "layout": from cv_analysis.layout_parsing import parse_layout as analyze elif args.type == "figure": - from cv_analysis.figure_detection import detect_figures as analyze + from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline + analyze = make_figure_detection_pipeline() annotate_page(page, analyze, draw, name=name, show=args.show) diff --git a/scripts/annotate_figures.py b/scripts/annotate_figures.py new file mode 100644 index 0000000..cdc72f4 --- /dev/null +++ b/scripts/annotate_figures.py @@ -0,0 +1,38 @@ +import argparse +from itertools import starmap +from pathlib import Path + +import numpy as np +import pdf2image +from PIL import Image +from funcy import lmap + +from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline +from cv_analysis.utils.draw import draw_rectangles + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--pdf_path", "-p", required=True) + parser.add_argument("--output_folder", "-o", required=True) + return parser.parse_args() + + +def annotate_figures(images): + pipeline = make_figure_detection_pipeline() + result = map(pipeline, images) + annotated_images = starmap(draw_rectangles, zip(images, result)) + return annotated_images + + +def save_as_pdf(images, output_folder, file_name): + Path(output_folder).mkdir(parents=True, exist_ok=True) + images = lmap(Image.fromarray, images) + images[0].save(f"{output_folder}/{file_name}_annotated_figures.pdf", save_all=True, append_images=images) + + +if __name__ == "__main__": + args = parse_args() + pages = lmap(np.array, pdf2image.convert_from_path(args.pdf_path)) + annotated_pages = annotate_figures(images=pages) + save_as_pdf(annotated_pages, args.output_folder, Path(args.pdf_path).stem) diff --git a/src/serve.py b/src/serve.py index c16acf1..a5481e9 100644 --- a/src/serve.py +++ b/src/serve.py @@ -3,7 +3,9 @@ import logging from waitress import serve from cv_analysis.config import CONFIG -from cv_analysis.pyinfra_compat import make_streamable_analysis_fn, get_analysis_fn +from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline +from cv_analysis.server.pyinfra_compat import make_streamable_analysis_fn +from cv_analysis.table_parsing import parse_tables from cv_analysis.utils.banner import make_art from cv_analysis.utils.logging import get_logger from incl.pyinfra.pyinfra.server.server import set_up_processing_server @@ -12,10 +14,10 @@ from incl.pyinfra.pyinfra.server.server import set_up_processing_server def main(): logger.info(make_art()) - operations = ["table_parsing"] - operation2function = {op: make_streamable_analysis_fn(get_analysis_fn(op)) for op in operations} + operation2function = {"table_parsing": parse_tables, "figure_detection": make_figure_detection_pipeline()} + operation2streamable_function = {op: make_streamable_analysis_fn(fn) for op, fn in operation2function.items()} - server = set_up_processing_server(operation2function) + server = set_up_processing_server(operation2streamable_function) host = CONFIG.webserver.host port = CONFIG.webserver.port diff --git a/test/conftest.py b/test/conftest.py index 06ab2f7..6760193 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,6 +1,8 @@ pytest_plugins = [ - "test.fixtures.pyinfra_compat", + "test.fixtures.server", + "test.fixtures.figure_detection", ] + def pytest_make_parametrize_id(config, val, argname): return f" {argname}={val} " diff --git a/test/fixtures/figure_detection.py b/test/fixtures/figure_detection.py new file mode 100644 index 0000000..83aefa4 --- /dev/null +++ b/test/fixtures/figure_detection.py @@ -0,0 +1,93 @@ +import textwrap + +import cv2 +import numpy as np +import pytest +from PIL import Image +from lorem_text import lorem +from funcy import first +from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline +from cv_analysis.utils.display import show_mpl + + +@pytest.fixture +def page_with_images(random_image, n_images, background): + page_image = Image.fromarray(background.astype("uint8")).convert("RGB") + page_image = paste_image(page_image, random_image, (200, 200)) + if n_images == 2: + page_image = paste_image(page_image, random_image, (1000, 2600)) + return np.array(page_image) + + +@pytest.fixture +def page_with_text(background, font_scale, font_style, text_types): + cursor = (50, 50) + image = background + body_height = image.shape[0] // 3 + if "header" in text_types: + image = paste_text(image, cursor, font_scale, font_style, y_stop=70) + if "body" in text_types: + cursor = (image.shape[1] // 2, 70) + image = paste_text(image, cursor, font_scale, font_style, y_stop=body_height) + cursor = (50, body_height+70) + image = paste_text(image, cursor, font_scale, font_style, y_stop=body_height*2) + if "caption" in text_types: + cursor = (image.shape[1] // 2, image.shape[0] - 100) + image = paste_text(image, cursor, font_scale, font_style, y_stop=body_height*3) + return image + + +@pytest.fixture +def page_with_images_and_text(page_with_images, page_with_text): + return np.fmin(page_with_text, page_with_images) + + +@pytest.fixture +def background(background_color): + return np.ones((3508, 2480, 3), dtype="uint8") * background_color + + +@pytest.fixture +def random_image(image_size): + return np.random.rand(*image_size, 3) * 255 + + +@pytest.fixture +def figure_detection_pipeline(): + return make_figure_detection_pipeline() + + +def paste_text(image: np.ndarray, cursor, font_scale, font_style, y_stop): + def paste_text_at_cursor(x_start, y_start, y_stop): + # TODO: adjust incorrect right margin + text = lorem.paragraphs(1) * 200 + (dx, dy), base = cv2.getTextSize(text, fontFace=font_style, fontScale=font_scale, thickness=1) + dy += base + # char_width = dx // len(text) + text = textwrap.fill(text=text, width=(dx // page_width)) + for i, line in enumerate(text.split("\n")): + y = y_start + i * dy + if y > y_stop: + break + cv2.putText( + image, + line, + org=(x_start, y), + fontFace=font_style, + fontScale=font_scale, + color=(0, 0, 0), + thickness=1, + lineType=cv2.LINE_AA, + ) + + x_start, y_start = cursor + page_width = image.shape[1] + paste_text_at_cursor(x_start, y_start, y_stop) + return image + + +def paste_image(page_image, image, coords): + image = Image.fromarray(image.astype("uint8")).convert("RGBA") + page_image.paste(image, coords) + return page_image + diff --git a/test/fixtures/pyinfra_compat.py b/test/fixtures/server.py similarity index 78% rename from test/fixtures/pyinfra_compat.py rename to test/fixtures/server.py index 238385f..86aa1c1 100644 --- a/test/fixtures/pyinfra_compat.py +++ b/test/fixtures/server.py @@ -6,17 +6,11 @@ import pytest from PIL import Image from funcy import first -from cv_analysis.pyinfra_compat import get_analysis_fn from cv_analysis.utils.preprocessing import open_img_from_bytes from cv_analysis.utils.structures import Rectangle from incl.pyinfra.pyinfra.server.packing import bytes_to_string, string_to_bytes -@pytest.fixture -def random_image(): - return np.random.rand(100, 100, 3) * 255 - - @pytest.fixture def random_image_as_bytes_and_compressed(random_image): image = Image.fromarray(random_image.astype("uint8")).convert("RGBA") @@ -51,11 +45,8 @@ def expected_analyse_metadata(operation, random_image_metadata_package): @pytest.fixture -def analyse_fn(operation): - if operation == "mock": +def analysis_fn_mock(operation): + def analyse_mock(image: np.ndarray): + return [Rectangle.from_xywh((0, 0, image.shape[1], image.shape[0]))] - def analyse_mock(image: np.ndarray): - return [Rectangle.from_xywh((0, 0, image.shape[1], image.shape[0]))] - - return analyse_mock - return get_analysis_fn(operation) + return analyse_mock diff --git a/test/unit_tests/__init__.py b/test/unit_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/unit_tests/figure_detection/__init__.py b/test/unit_tests/figure_detection/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/unit_tests/figure_detection/figure_detection_pipeline_test.py b/test/unit_tests/figure_detection/figure_detection_pipeline_test.py new file mode 100644 index 0000000..57b5526 --- /dev/null +++ b/test/unit_tests/figure_detection/figure_detection_pipeline_test.py @@ -0,0 +1,55 @@ +from math import prod + +import cv2 +import pytest + +from cv_analysis.utils.display import show_mpl +from cv_analysis.utils.draw import draw_rectangles +from test.utils.utils import powerset + + +@pytest.mark.parametrize("background_color", [255, 220]) +class TestFindPrimaryTextRegions: + def test_blank_page_yields_no_figures(self, figure_detection_pipeline, background): + results = figure_detection_pipeline(background) + + assert not list(results) + + @pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)]) + @pytest.mark.parametrize("n_images", [1, 2]) + def test_page_without_text_yields_figures(self, figure_detection_pipeline, page_with_images, image_size): + results = figure_detection_pipeline(page_with_images) + result_figures_size = map(lambda x: (x.w, x.h), results) + + assert all([image_size[0] < res[0] and image_size[1] < res[1] for res in result_figures_size]) + + @pytest.mark.parametrize("font_scale", [1, 1.5, 2]) + @pytest.mark.parametrize("font_style", [cv2.FONT_HERSHEY_SIMPLEX, cv2.FONT_HERSHEY_COMPLEX]) + @pytest.mark.parametrize("text_types", powerset(["body", "header", "caption"])) + @pytest.mark.parametrize("error_tolerance", [0.025]) + def test_page_with_only_text_yields_no_figures(self, figure_detection_pipeline, page_with_text, error_tolerance): + results = figure_detection_pipeline(page_with_text) + + result_figures_area = sum(map(lambda x: (x.w * x.h), results)) + page_area = prod(page_with_text.shape) + error = result_figures_area / page_area + + assert error <= error_tolerance + + @pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)]) + @pytest.mark.parametrize("n_images", [1, 2]) + @pytest.mark.parametrize("font_scale", [1, 1.5, 2]) + @pytest.mark.parametrize("font_style", [cv2.FONT_HERSHEY_SIMPLEX, cv2.FONT_HERSHEY_COMPLEX]) + @pytest.mark.parametrize("text_types", powerset(["body", "header", "caption"])) + @pytest.mark.parametrize("error_tolerance", [0.9]) + def test_page_with_images_and_text_yields_only_figures( + self, figure_detection_pipeline, page_with_images_and_text, image_size, n_images, error_tolerance + ): + results = list(figure_detection_pipeline(page_with_images_and_text)) + + result_figures_area = sum(map(lambda x: (x.w * x.h), results)) + expected_figure_area = n_images * prod(image_size) + + error = abs(result_figures_area - expected_figure_area) / expected_figure_area + + assert error <= error_tolerance diff --git a/test/unit_tests/figure_detection/text_test.py b/test/unit_tests/figure_detection/text_test.py new file mode 100644 index 0000000..ce04285 --- /dev/null +++ b/test/unit_tests/figure_detection/text_test.py @@ -0,0 +1,40 @@ +import cv2 +import numpy as np +import pytest + +from cv_analysis.figure_detection.text import remove_primary_text_regions, apply_threshold_to_image +from cv_analysis.utils.display import show_mpl +from test.utils.utils import powerset + + +@pytest.mark.parametrize("error_tolerance", [0.07]) +@pytest.mark.parametrize("background_color", [255, 220]) +class TestFindPrimaryTextRegions: + def test_blank_page_stays_blank(self, background, error_tolerance): + result_page = remove_primary_text_regions(background) + + np.testing.assert_equal(result_page, apply_threshold_to_image(background)) + + @pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)]) + @pytest.mark.parametrize("n_images", [1, 2]) + def test_page_without_text_keeps_images(self, page_with_images, error_tolerance): + result_page = remove_primary_text_regions(page_with_images) + np.testing.assert_equal(result_page, apply_threshold_to_image(page_with_images)) + + @pytest.mark.parametrize("font_scale", [1, 1.5, 2]) + @pytest.mark.parametrize("font_style", [cv2.FONT_HERSHEY_SIMPLEX, cv2.FONT_HERSHEY_COMPLEX]) + @pytest.mark.parametrize("text_types", powerset(["body", "header", "caption"])) + def test_page_with_only_text_gets_text_removed(self, page_with_text, error_tolerance): + result_page = remove_primary_text_regions(page_with_text) + relative_error = np.sum(result_page != apply_threshold_to_image(page_with_text)) / result_page.size + assert relative_error <= error_tolerance + + @pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)]) + @pytest.mark.parametrize("n_images", [1, 2]) + @pytest.mark.parametrize("font_scale", [1, 1.5, 2]) + @pytest.mark.parametrize("font_style", [cv2.FONT_HERSHEY_SIMPLEX, cv2.FONT_HERSHEY_COMPLEX]) + @pytest.mark.parametrize("text_types", powerset(["body", "header", "caption"])) + def test_page_with_images_and_text_keeps_images(self, page_with_images_and_text, error_tolerance): + result_page = remove_primary_text_regions(page_with_images_and_text) + relative_error = np.sum(result_page != apply_threshold_to_image(page_with_images_and_text)) / result_page.size + assert relative_error <= error_tolerance diff --git a/test/unit_tests/pyinfra_compat_test.py b/test/unit_tests/pyinfra_compat_test.py deleted file mode 100644 index 29cf0c4..0000000 --- a/test/unit_tests/pyinfra_compat_test.py +++ /dev/null @@ -1,31 +0,0 @@ -import pytest -from funcy import first - -from cv_analysis.figure_detection import detect_figures -from cv_analysis.layout_parsing import parse_layout -from cv_analysis.pyinfra_compat import get_analysis_fn, make_streamable_analysis_fn -from cv_analysis.redaction_detection import find_redactions -from cv_analysis.table_parsing import parse_tables - - -@pytest.mark.parametrize( - "analysis_fn_name,analysis_fn", - [ - ("table_parsing", parse_tables), - ("layout_parsing", parse_layout), - ("figure_detection", detect_figures), - ("redaction_detection", find_redactions), - ], -) -def test_get_analysis_fn(analysis_fn_name, analysis_fn): - fn = get_analysis_fn - assert fn(analysis_fn_name) == analysis_fn - - -@pytest.mark.parametrize("operation", ["mock", "table_parsing"]) -def test_make_analysis_fn(analyse_fn, random_image_metadata_package, expected_analyse_metadata): - analyse = make_streamable_analysis_fn(analyse_fn) - results = first(analyse(random_image_metadata_package)) - - assert results["metadata"] == expected_analyse_metadata - diff --git a/test/unit_tests/server/__init__.py b/test/unit_tests/server/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/unit_tests/server/pyinfra_compat_test.py b/test/unit_tests/server/pyinfra_compat_test.py new file mode 100644 index 0000000..d2f0690 --- /dev/null +++ b/test/unit_tests/server/pyinfra_compat_test.py @@ -0,0 +1,13 @@ +import pytest +from funcy import first + +from cv_analysis.server.pyinfra_compat import make_streamable_analysis_fn + + +@pytest.mark.parametrize("operation", ["mock"]) +@pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)]) +def test_make_analysis_fn(analysis_fn_mock, random_image_metadata_package, expected_analyse_metadata): + analyse = make_streamable_analysis_fn(analysis_fn_mock) + results = first(analyse(random_image_metadata_package)) + + assert results["metadata"] == expected_analyse_metadata diff --git a/test/utils/__init__.py b/test/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/utils/utils.py b/test/utils/utils.py new file mode 100644 index 0000000..50bc13b --- /dev/null +++ b/test/utils/utils.py @@ -0,0 +1,6 @@ +from itertools import chain, combinations + + +def powerset(iterable): + s = list(iterable) + return list(chain.from_iterable(combinations(s, r) for r in range(1, len(s) + 1)))