Pull request #12: Diff font sizes on page
Merge in RR/cv-analysis from diff-font-sizes-on-page to master
Squashed commit of the following:
commit d1b32a3e8fadd45d38040e1ba96672ace240ae29
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Thu Jun 30 14:43:30 2022 +0200
add tests for figure detection first iteration
commit c38a7701afaad513320f157fe7188b3f11a682ac
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Thu Jun 30 14:26:08 2022 +0200
update text tests with new test cases
commit ccc0c1a177c7d69c9575ec0267a492c3eef008e3
Author: llocarnini <lillian.locarnini@iqser.com>
Date: Wed Jun 29 23:09:24 2022 +0200
added fixture for different scaled text on page and parameter for different font style
commit 5f36a634caad2849e673de7d64abb5b6c3a6055f
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Jun 28 17:03:52 2022 +0200
add pdf2pdf annotate script for figure detection
commit 7438c170371e166e82ab19f9dfdf1bddd89b7bb3
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Jun 28 16:24:52 2022 +0200
optimize algorithm
commit 93bf8820f856d3815bab36b13c0df189c45d01e0
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Jun 28 16:11:15 2022 +0200
black
commit 59c639eec7d3f9da538b0ad6cd6215456c92eb58
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Jun 28 16:10:39 2022 +0200
add tests for figure detection pipeline
commit bada688d88231843e9d299d255d9c4e0d5ca9788
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Jun 28 13:34:36 2022 +0200
refactor tests
commit 614388a18b46d670527727c11f63e8174aed3736
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Jun 28 13:34:14 2022 +0200
introduce pipeline logic for figure detection
commit 7195f892d543294829aebe80e260b4395b89cb36
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Jun 28 11:58:41 2022 +0200
update reqs
commit 4408e7975853196c5e363dd2ddf62e15fe6f4944
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Jun 28 11:56:16 2022 +0200
add figure detection test
commit 5ff472c2d96238ca2bc1d2368d3d02e62db98713
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Jun 28 11:56:09 2022 +0200
add figure detection test
commit 66c1307e57c84789d64cb8e41d8e923ac98eebde
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Jun 28 10:36:50 2022 +0200
refactor draw boxes to work as intended on inversed image
commit 00a39050d051ae43b2a8f2c4efd6bfbd2609dead
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Jun 28 10:36:11 2022 +0200
refactor module structure
commit f8af01894c387468334a332e75f7dbf545a91f86
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Mon Jun 27 17:07:47 2022 +0200
add: figure detection now agnostic to input image background color, refactor tests
commit 3bc63da783bced571d53b29b6d82648c9f93e886
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Mon Jun 27 14:31:15 2022 +0200
add text removal tests
commit 6e794a7cee3fd7633aa5084839775877b0f8794c
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Mon Jun 27 12:12:27 2022 +0200
figure detection tests WIP
commit f8b20d4c9845de6434142e3dab69ce467fbc7a75
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Fri Jun 24 15:39:37 2022 +0200
add tests for figure_detection WIP
commit f2a52a07a5e261962214dff40ba710c93993f6fb
Author: llocarnini <lillian.locarnini@iqser.com>
Date: Fri Jun 24 14:28:44 2022 +0200
added third test case "figure_and_text"
commit 8f45c88278cdcd32a121ea8269c8eca816bffd0b
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Fri Jun 24 13:25:17 2022 +0200
add tests for figure_detection
This commit is contained in:
parent
3ae4d81bb9
commit
fc8a9e15f8
@ -1,37 +0,0 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
from pdf2image import pdf2image
|
||||
|
||||
# import pandas as pd
|
||||
from PIL import Image
|
||||
import timeit
|
||||
from os import path
|
||||
from cv_analysis.locations import METADATA_TESTFILES, PNG_FOR_TESTING, PNG_FIGURES_DETECTED
|
||||
from cv_analysis.utils.detection import detect_large_coherent_structures
|
||||
from cv_analysis.utils.display import show_mpl
|
||||
from cv_analysis.utils.draw import draw_rectangles
|
||||
from cv_analysis.utils.post_processing import remove_included
|
||||
from cv_analysis.utils.filters import is_large_enough, has_acceptable_format
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
from cv_analysis.utils.text import remove_primary_text_regions
|
||||
from cv_analysis.utils.visual_logging import vizlogger
|
||||
|
||||
|
||||
def is_likely_figure(cont, min_area=5000, max_width_to_hight_ratio=6):
|
||||
return is_large_enough(cont, min_area) and has_acceptable_format(cont, max_width_to_hight_ratio)
|
||||
|
||||
|
||||
def detect_figures(image: np.array):
|
||||
|
||||
image = image.copy()
|
||||
vizlogger.debug(image, "figures01_start.png")
|
||||
|
||||
image = remove_primary_text_regions(image)
|
||||
vizlogger.debug(image, "figures02_remove_text.png")
|
||||
cnts = detect_large_coherent_structures(image)
|
||||
|
||||
cnts = filter(is_likely_figure, cnts)
|
||||
rects = map(cv2.boundingRect, cnts)
|
||||
rects = remove_included(rects)
|
||||
|
||||
return list(map(Rectangle.from_xywh, rects))
|
||||
0
cv_analysis/figure_detection/__init__.py
Normal file
0
cv_analysis/figure_detection/__init__.py
Normal file
37
cv_analysis/figure_detection/figure_detection_pipeline.py
Normal file
37
cv_analysis/figure_detection/figure_detection_pipeline.py
Normal file
@ -0,0 +1,37 @@
|
||||
from functools import partial
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from cv_analysis.figure_detection.figures import detect_large_coherent_structures
|
||||
from cv_analysis.figure_detection.text import remove_primary_text_regions
|
||||
from cv_analysis.utils.filters import is_large_enough, has_acceptable_format, is_not_too_large
|
||||
from cv_analysis.utils.post_processing import remove_included
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
|
||||
|
||||
def make_figure_detection_pipeline(min_area=5000, max_width_to_height_ratio=6):
|
||||
def pipeline(image: np.array):
|
||||
max_area = image.shape[0] * image.shape[1] * 0.99
|
||||
filter_cnts = make_filter_likely_figures(min_area, max_area, max_width_to_height_ratio)
|
||||
|
||||
image = remove_primary_text_regions(image)
|
||||
cnts = detect_large_coherent_structures(image)
|
||||
cnts = filter_cnts(cnts)
|
||||
|
||||
rects = remove_included(map(cv2.boundingRect, cnts))
|
||||
rectangles = map(Rectangle.from_xywh, rects)
|
||||
return rectangles
|
||||
|
||||
return pipeline
|
||||
|
||||
|
||||
def make_filter_likely_figures(min_area, max_area, max_width_to_height_ratio):
|
||||
def is_likely_figure(cnts):
|
||||
return (
|
||||
is_not_too_large(cnts, max_area)
|
||||
and is_large_enough(cnts, min_area)
|
||||
and has_acceptable_format(cnts, max_width_to_height_ratio)
|
||||
)
|
||||
|
||||
return partial(filter, is_likely_figure)
|
||||
@ -4,21 +4,22 @@ import numpy as np
|
||||
|
||||
def detect_large_coherent_structures(image: np.array):
|
||||
"""Detects large coherent structures on an image.
|
||||
Expects an image with binary color space (e.g. threshold applied).
|
||||
|
||||
Returns:
|
||||
contours
|
||||
|
||||
References:
|
||||
https://stackoverflow.com/questions/60259169/how-to-group-nearby-contours-in-opencv-python-zebra-crossing-detection
|
||||
"""
|
||||
if len(image.shape) > 2:
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
image = cv2.threshold(image, 253, 255, cv2.THRESH_BINARY)[1]
|
||||
assert len(image.shape) == 2
|
||||
|
||||
dilate_kernel = cv2.getStructuringElement(cv2.MORPH_OPEN, (5, 5))
|
||||
dilate = cv2.dilate(~image, dilate_kernel, iterations=4)
|
||||
dilate = cv2.dilate(image, dilate_kernel, iterations=4)
|
||||
|
||||
close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 20))
|
||||
close = cv2.morphologyEx(dilate, cv2.MORPH_CLOSE, close_kernel, iterations=1)
|
||||
|
||||
counts, _ = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
cnts, _ = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
return counts
|
||||
return cnts
|
||||
51
cv_analysis/figure_detection/text.py
Normal file
51
cv_analysis/figure_detection/text.py
Normal file
@ -0,0 +1,51 @@
|
||||
import cv2
|
||||
|
||||
|
||||
def remove_primary_text_regions(image):
|
||||
"""Removes regions of primary text, meaning no figure descriptions for example, but main text body paragraphs.
|
||||
|
||||
Args:
|
||||
image: Image to remove primary text from.
|
||||
|
||||
Returns:
|
||||
Image with primary text removed.
|
||||
|
||||
References:
|
||||
https://stackoverflow.com/questions/58349726/opencv-how-to-remove-text-from-background
|
||||
"""
|
||||
|
||||
image = apply_threshold_to_image(image)
|
||||
|
||||
threshold_image = image.copy()
|
||||
|
||||
close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (17, 7)) # 20,3
|
||||
close = cv2.morphologyEx(image, cv2.MORPH_CLOSE, close_kernel, iterations=1)
|
||||
|
||||
dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 3)) # 5,3
|
||||
dilate = cv2.dilate(close, dilate_kernel, iterations=1)
|
||||
|
||||
cnts, _ = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
|
||||
cnts = filter(is_likely_primary_text_segment, cnts)
|
||||
|
||||
rects = map(cv2.boundingRect, cnts)
|
||||
|
||||
image = draw_bboxes(threshold_image, rects)
|
||||
return image
|
||||
|
||||
|
||||
def apply_threshold_to_image(image):
|
||||
"""Converts an image to black and white."""
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image
|
||||
return cv2.threshold(image, 253, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
|
||||
|
||||
|
||||
def is_likely_primary_text_segment(cnt):
|
||||
x, y, w, h = cv2.boundingRect(cnt)
|
||||
return 400 < cv2.contourArea(cnt) < 16000 or w / h > 3
|
||||
|
||||
|
||||
def draw_bboxes(image, bboxes):
|
||||
for rect in bboxes:
|
||||
x, y, w, h = rect
|
||||
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1)
|
||||
return image
|
||||
0
cv_analysis/server/__init__.py
Normal file
0
cv_analysis/server/__init__.py
Normal file
@ -4,10 +4,6 @@ from typing import Callable
|
||||
from funcy import lmap
|
||||
from pyinfra.server.utils import make_streamable_and_wrap_in_packing_logic
|
||||
|
||||
from cv_analysis.figure_detection import detect_figures
|
||||
from cv_analysis.layout_parsing import parse_layout
|
||||
from cv_analysis.redaction_detection import find_redactions
|
||||
from cv_analysis.table_parsing import parse_tables
|
||||
from cv_analysis.utils.preprocessing import open_img_from_bytes
|
||||
|
||||
|
||||
@ -38,12 +34,3 @@ def make_streamable_analysis_fn(analysis_fn: Callable):
|
||||
return b"", results_metadata
|
||||
|
||||
return make_streamable_and_wrap_in_packing_logic(analyse, batched=False)
|
||||
|
||||
|
||||
def get_analysis_fn(operation):
|
||||
return {
|
||||
"table_parsing": parse_tables,
|
||||
"layout_parsing": parse_layout,
|
||||
"figure_detection": detect_figures,
|
||||
"redaction_detection": find_redactions,
|
||||
}[operation]
|
||||
@ -5,6 +5,10 @@ def is_large_enough(cont, min_area):
|
||||
return cv2.contourArea(cont, False) > min_area
|
||||
|
||||
|
||||
def is_not_too_large(cnt, max_area):
|
||||
return cv2.contourArea(cnt, False) < max_area
|
||||
|
||||
|
||||
def has_acceptable_format(cont, max_width_to_height_ratio):
|
||||
_, _, w, h = cv2.boundingRect(cont)
|
||||
return max_width_to_height_ratio >= w / h >= (1 / max_width_to_height_ratio)
|
||||
|
||||
@ -1,64 +0,0 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from cv_analysis.utils.display import show_mpl
|
||||
|
||||
|
||||
def remove_primary_text_regions(image):
|
||||
"""Removes regions of primary text, meaning no figure descriptions for example, but main text body paragraphs.
|
||||
|
||||
Args:
|
||||
image: Image to remove primary text from.
|
||||
|
||||
Returns:
|
||||
Image with primary text removed.
|
||||
"""
|
||||
|
||||
image = image.copy()
|
||||
|
||||
cnts = find_primary_text_regions(image)
|
||||
for cnt in cnts:
|
||||
x, y, w, h = cv2.boundingRect(cnt)
|
||||
cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), -1)
|
||||
# show_mpl(image)
|
||||
return image
|
||||
|
||||
|
||||
def find_primary_text_regions(image):
|
||||
"""Finds regions of primary text, meaning no figure descriptions for example, but main text body paragraphs.
|
||||
|
||||
Args:
|
||||
image: Image to remove primary text from.
|
||||
|
||||
Returns:
|
||||
Image with primary text removed.
|
||||
|
||||
References:
|
||||
https://stackoverflow.com/questions/58349726/opencv-how-to-remove-text-from-background
|
||||
"""
|
||||
|
||||
def is_likely_primary_text_segments(cnt):
|
||||
x, y, w, h = cv2.boundingRect(cnt)
|
||||
return 800 < cv2.contourArea(cnt) < 16000 or w / h > 3
|
||||
|
||||
image = image.copy()
|
||||
|
||||
if len(image.shape) > 2:
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
image = cv2.threshold(image, 253, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
|
||||
|
||||
close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (17, 7)) # 20,3
|
||||
close = cv2.morphologyEx(image, cv2.MORPH_CLOSE, close_kernel, iterations=1)
|
||||
|
||||
# show_mpl(close)
|
||||
|
||||
dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 3)) # 5,3
|
||||
dilate = cv2.dilate(close, dilate_kernel, iterations=1)
|
||||
|
||||
# show_mpl(dilate)
|
||||
|
||||
cnts, _ = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
|
||||
cnts = filter(is_likely_primary_text_segments, cnts)
|
||||
|
||||
return cnts
|
||||
@ -15,4 +15,5 @@ scipy~=1.8.0
|
||||
coverage~=5.5
|
||||
dependency-check~=0.6.0
|
||||
prometheus-client~=0.13.1
|
||||
prometheus_flask_exporter~=0.19.0
|
||||
prometheus_flask_exporter~=0.19.0
|
||||
lorem-text==2.1
|
||||
@ -45,5 +45,6 @@ if __name__ == "__main__":
|
||||
elif args.type == "layout":
|
||||
from cv_analysis.layout_parsing import parse_layout as analyze
|
||||
elif args.type == "figure":
|
||||
from cv_analysis.figure_detection import detect_figures as analyze
|
||||
from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline
|
||||
analyze = make_figure_detection_pipeline()
|
||||
annotate_page(page, analyze, draw, name=name, show=args.show)
|
||||
|
||||
38
scripts/annotate_figures.py
Normal file
38
scripts/annotate_figures.py
Normal file
@ -0,0 +1,38 @@
|
||||
import argparse
|
||||
from itertools import starmap
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pdf2image
|
||||
from PIL import Image
|
||||
from funcy import lmap
|
||||
|
||||
from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline
|
||||
from cv_analysis.utils.draw import draw_rectangles
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--pdf_path", "-p", required=True)
|
||||
parser.add_argument("--output_folder", "-o", required=True)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def annotate_figures(images):
|
||||
pipeline = make_figure_detection_pipeline()
|
||||
result = map(pipeline, images)
|
||||
annotated_images = starmap(draw_rectangles, zip(images, result))
|
||||
return annotated_images
|
||||
|
||||
|
||||
def save_as_pdf(images, output_folder, file_name):
|
||||
Path(output_folder).mkdir(parents=True, exist_ok=True)
|
||||
images = lmap(Image.fromarray, images)
|
||||
images[0].save(f"{output_folder}/{file_name}_annotated_figures.pdf", save_all=True, append_images=images)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
pages = lmap(np.array, pdf2image.convert_from_path(args.pdf_path))
|
||||
annotated_pages = annotate_figures(images=pages)
|
||||
save_as_pdf(annotated_pages, args.output_folder, Path(args.pdf_path).stem)
|
||||
10
src/serve.py
10
src/serve.py
@ -3,7 +3,9 @@ import logging
|
||||
from waitress import serve
|
||||
|
||||
from cv_analysis.config import CONFIG
|
||||
from cv_analysis.pyinfra_compat import make_streamable_analysis_fn, get_analysis_fn
|
||||
from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline
|
||||
from cv_analysis.server.pyinfra_compat import make_streamable_analysis_fn
|
||||
from cv_analysis.table_parsing import parse_tables
|
||||
from cv_analysis.utils.banner import make_art
|
||||
from cv_analysis.utils.logging import get_logger
|
||||
from incl.pyinfra.pyinfra.server.server import set_up_processing_server
|
||||
@ -12,10 +14,10 @@ from incl.pyinfra.pyinfra.server.server import set_up_processing_server
|
||||
def main():
|
||||
logger.info(make_art())
|
||||
|
||||
operations = ["table_parsing"]
|
||||
operation2function = {op: make_streamable_analysis_fn(get_analysis_fn(op)) for op in operations}
|
||||
operation2function = {"table_parsing": parse_tables, "figure_detection": make_figure_detection_pipeline()}
|
||||
operation2streamable_function = {op: make_streamable_analysis_fn(fn) for op, fn in operation2function.items()}
|
||||
|
||||
server = set_up_processing_server(operation2function)
|
||||
server = set_up_processing_server(operation2streamable_function)
|
||||
|
||||
host = CONFIG.webserver.host
|
||||
port = CONFIG.webserver.port
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
pytest_plugins = [
|
||||
"test.fixtures.pyinfra_compat",
|
||||
"test.fixtures.server",
|
||||
"test.fixtures.figure_detection",
|
||||
]
|
||||
|
||||
|
||||
def pytest_make_parametrize_id(config, val, argname):
|
||||
return f" {argname}={val} "
|
||||
|
||||
93
test/fixtures/figure_detection.py
vendored
Normal file
93
test/fixtures/figure_detection.py
vendored
Normal file
@ -0,0 +1,93 @@
|
||||
import textwrap
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import pytest
|
||||
from PIL import Image
|
||||
from lorem_text import lorem
|
||||
from funcy import first
|
||||
from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline
|
||||
from cv_analysis.utils.display import show_mpl
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def page_with_images(random_image, n_images, background):
|
||||
page_image = Image.fromarray(background.astype("uint8")).convert("RGB")
|
||||
page_image = paste_image(page_image, random_image, (200, 200))
|
||||
if n_images == 2:
|
||||
page_image = paste_image(page_image, random_image, (1000, 2600))
|
||||
return np.array(page_image)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def page_with_text(background, font_scale, font_style, text_types):
|
||||
cursor = (50, 50)
|
||||
image = background
|
||||
body_height = image.shape[0] // 3
|
||||
if "header" in text_types:
|
||||
image = paste_text(image, cursor, font_scale, font_style, y_stop=70)
|
||||
if "body" in text_types:
|
||||
cursor = (image.shape[1] // 2, 70)
|
||||
image = paste_text(image, cursor, font_scale, font_style, y_stop=body_height)
|
||||
cursor = (50, body_height+70)
|
||||
image = paste_text(image, cursor, font_scale, font_style, y_stop=body_height*2)
|
||||
if "caption" in text_types:
|
||||
cursor = (image.shape[1] // 2, image.shape[0] - 100)
|
||||
image = paste_text(image, cursor, font_scale, font_style, y_stop=body_height*3)
|
||||
return image
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def page_with_images_and_text(page_with_images, page_with_text):
|
||||
return np.fmin(page_with_text, page_with_images)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def background(background_color):
|
||||
return np.ones((3508, 2480, 3), dtype="uint8") * background_color
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def random_image(image_size):
|
||||
return np.random.rand(*image_size, 3) * 255
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def figure_detection_pipeline():
|
||||
return make_figure_detection_pipeline()
|
||||
|
||||
|
||||
def paste_text(image: np.ndarray, cursor, font_scale, font_style, y_stop):
|
||||
def paste_text_at_cursor(x_start, y_start, y_stop):
|
||||
# TODO: adjust incorrect right margin
|
||||
text = lorem.paragraphs(1) * 200
|
||||
(dx, dy), base = cv2.getTextSize(text, fontFace=font_style, fontScale=font_scale, thickness=1)
|
||||
dy += base
|
||||
# char_width = dx // len(text)
|
||||
text = textwrap.fill(text=text, width=(dx // page_width))
|
||||
for i, line in enumerate(text.split("\n")):
|
||||
y = y_start + i * dy
|
||||
if y > y_stop:
|
||||
break
|
||||
cv2.putText(
|
||||
image,
|
||||
line,
|
||||
org=(x_start, y),
|
||||
fontFace=font_style,
|
||||
fontScale=font_scale,
|
||||
color=(0, 0, 0),
|
||||
thickness=1,
|
||||
lineType=cv2.LINE_AA,
|
||||
)
|
||||
|
||||
x_start, y_start = cursor
|
||||
page_width = image.shape[1]
|
||||
paste_text_at_cursor(x_start, y_start, y_stop)
|
||||
return image
|
||||
|
||||
|
||||
def paste_image(page_image, image, coords):
|
||||
image = Image.fromarray(image.astype("uint8")).convert("RGBA")
|
||||
page_image.paste(image, coords)
|
||||
return page_image
|
||||
|
||||
@ -6,17 +6,11 @@ import pytest
|
||||
from PIL import Image
|
||||
from funcy import first
|
||||
|
||||
from cv_analysis.pyinfra_compat import get_analysis_fn
|
||||
from cv_analysis.utils.preprocessing import open_img_from_bytes
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
from incl.pyinfra.pyinfra.server.packing import bytes_to_string, string_to_bytes
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def random_image():
|
||||
return np.random.rand(100, 100, 3) * 255
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def random_image_as_bytes_and_compressed(random_image):
|
||||
image = Image.fromarray(random_image.astype("uint8")).convert("RGBA")
|
||||
@ -51,11 +45,8 @@ def expected_analyse_metadata(operation, random_image_metadata_package):
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def analyse_fn(operation):
|
||||
if operation == "mock":
|
||||
def analysis_fn_mock(operation):
|
||||
def analyse_mock(image: np.ndarray):
|
||||
return [Rectangle.from_xywh((0, 0, image.shape[1], image.shape[0]))]
|
||||
|
||||
def analyse_mock(image: np.ndarray):
|
||||
return [Rectangle.from_xywh((0, 0, image.shape[1], image.shape[0]))]
|
||||
|
||||
return analyse_mock
|
||||
return get_analysis_fn(operation)
|
||||
return analyse_mock
|
||||
0
test/unit_tests/__init__.py
Normal file
0
test/unit_tests/__init__.py
Normal file
0
test/unit_tests/figure_detection/__init__.py
Normal file
0
test/unit_tests/figure_detection/__init__.py
Normal file
@ -0,0 +1,55 @@
|
||||
from math import prod
|
||||
|
||||
import cv2
|
||||
import pytest
|
||||
|
||||
from cv_analysis.utils.display import show_mpl
|
||||
from cv_analysis.utils.draw import draw_rectangles
|
||||
from test.utils.utils import powerset
|
||||
|
||||
|
||||
@pytest.mark.parametrize("background_color", [255, 220])
|
||||
class TestFindPrimaryTextRegions:
|
||||
def test_blank_page_yields_no_figures(self, figure_detection_pipeline, background):
|
||||
results = figure_detection_pipeline(background)
|
||||
|
||||
assert not list(results)
|
||||
|
||||
@pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)])
|
||||
@pytest.mark.parametrize("n_images", [1, 2])
|
||||
def test_page_without_text_yields_figures(self, figure_detection_pipeline, page_with_images, image_size):
|
||||
results = figure_detection_pipeline(page_with_images)
|
||||
result_figures_size = map(lambda x: (x.w, x.h), results)
|
||||
|
||||
assert all([image_size[0] < res[0] and image_size[1] < res[1] for res in result_figures_size])
|
||||
|
||||
@pytest.mark.parametrize("font_scale", [1, 1.5, 2])
|
||||
@pytest.mark.parametrize("font_style", [cv2.FONT_HERSHEY_SIMPLEX, cv2.FONT_HERSHEY_COMPLEX])
|
||||
@pytest.mark.parametrize("text_types", powerset(["body", "header", "caption"]))
|
||||
@pytest.mark.parametrize("error_tolerance", [0.025])
|
||||
def test_page_with_only_text_yields_no_figures(self, figure_detection_pipeline, page_with_text, error_tolerance):
|
||||
results = figure_detection_pipeline(page_with_text)
|
||||
|
||||
result_figures_area = sum(map(lambda x: (x.w * x.h), results))
|
||||
page_area = prod(page_with_text.shape)
|
||||
error = result_figures_area / page_area
|
||||
|
||||
assert error <= error_tolerance
|
||||
|
||||
@pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)])
|
||||
@pytest.mark.parametrize("n_images", [1, 2])
|
||||
@pytest.mark.parametrize("font_scale", [1, 1.5, 2])
|
||||
@pytest.mark.parametrize("font_style", [cv2.FONT_HERSHEY_SIMPLEX, cv2.FONT_HERSHEY_COMPLEX])
|
||||
@pytest.mark.parametrize("text_types", powerset(["body", "header", "caption"]))
|
||||
@pytest.mark.parametrize("error_tolerance", [0.9])
|
||||
def test_page_with_images_and_text_yields_only_figures(
|
||||
self, figure_detection_pipeline, page_with_images_and_text, image_size, n_images, error_tolerance
|
||||
):
|
||||
results = list(figure_detection_pipeline(page_with_images_and_text))
|
||||
|
||||
result_figures_area = sum(map(lambda x: (x.w * x.h), results))
|
||||
expected_figure_area = n_images * prod(image_size)
|
||||
|
||||
error = abs(result_figures_area - expected_figure_area) / expected_figure_area
|
||||
|
||||
assert error <= error_tolerance
|
||||
40
test/unit_tests/figure_detection/text_test.py
Normal file
40
test/unit_tests/figure_detection/text_test.py
Normal file
@ -0,0 +1,40 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from cv_analysis.figure_detection.text import remove_primary_text_regions, apply_threshold_to_image
|
||||
from cv_analysis.utils.display import show_mpl
|
||||
from test.utils.utils import powerset
|
||||
|
||||
|
||||
@pytest.mark.parametrize("error_tolerance", [0.07])
|
||||
@pytest.mark.parametrize("background_color", [255, 220])
|
||||
class TestFindPrimaryTextRegions:
|
||||
def test_blank_page_stays_blank(self, background, error_tolerance):
|
||||
result_page = remove_primary_text_regions(background)
|
||||
|
||||
np.testing.assert_equal(result_page, apply_threshold_to_image(background))
|
||||
|
||||
@pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)])
|
||||
@pytest.mark.parametrize("n_images", [1, 2])
|
||||
def test_page_without_text_keeps_images(self, page_with_images, error_tolerance):
|
||||
result_page = remove_primary_text_regions(page_with_images)
|
||||
np.testing.assert_equal(result_page, apply_threshold_to_image(page_with_images))
|
||||
|
||||
@pytest.mark.parametrize("font_scale", [1, 1.5, 2])
|
||||
@pytest.mark.parametrize("font_style", [cv2.FONT_HERSHEY_SIMPLEX, cv2.FONT_HERSHEY_COMPLEX])
|
||||
@pytest.mark.parametrize("text_types", powerset(["body", "header", "caption"]))
|
||||
def test_page_with_only_text_gets_text_removed(self, page_with_text, error_tolerance):
|
||||
result_page = remove_primary_text_regions(page_with_text)
|
||||
relative_error = np.sum(result_page != apply_threshold_to_image(page_with_text)) / result_page.size
|
||||
assert relative_error <= error_tolerance
|
||||
|
||||
@pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)])
|
||||
@pytest.mark.parametrize("n_images", [1, 2])
|
||||
@pytest.mark.parametrize("font_scale", [1, 1.5, 2])
|
||||
@pytest.mark.parametrize("font_style", [cv2.FONT_HERSHEY_SIMPLEX, cv2.FONT_HERSHEY_COMPLEX])
|
||||
@pytest.mark.parametrize("text_types", powerset(["body", "header", "caption"]))
|
||||
def test_page_with_images_and_text_keeps_images(self, page_with_images_and_text, error_tolerance):
|
||||
result_page = remove_primary_text_regions(page_with_images_and_text)
|
||||
relative_error = np.sum(result_page != apply_threshold_to_image(page_with_images_and_text)) / result_page.size
|
||||
assert relative_error <= error_tolerance
|
||||
@ -1,31 +0,0 @@
|
||||
import pytest
|
||||
from funcy import first
|
||||
|
||||
from cv_analysis.figure_detection import detect_figures
|
||||
from cv_analysis.layout_parsing import parse_layout
|
||||
from cv_analysis.pyinfra_compat import get_analysis_fn, make_streamable_analysis_fn
|
||||
from cv_analysis.redaction_detection import find_redactions
|
||||
from cv_analysis.table_parsing import parse_tables
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"analysis_fn_name,analysis_fn",
|
||||
[
|
||||
("table_parsing", parse_tables),
|
||||
("layout_parsing", parse_layout),
|
||||
("figure_detection", detect_figures),
|
||||
("redaction_detection", find_redactions),
|
||||
],
|
||||
)
|
||||
def test_get_analysis_fn(analysis_fn_name, analysis_fn):
|
||||
fn = get_analysis_fn
|
||||
assert fn(analysis_fn_name) == analysis_fn
|
||||
|
||||
|
||||
@pytest.mark.parametrize("operation", ["mock", "table_parsing"])
|
||||
def test_make_analysis_fn(analyse_fn, random_image_metadata_package, expected_analyse_metadata):
|
||||
analyse = make_streamable_analysis_fn(analyse_fn)
|
||||
results = first(analyse(random_image_metadata_package))
|
||||
|
||||
assert results["metadata"] == expected_analyse_metadata
|
||||
|
||||
0
test/unit_tests/server/__init__.py
Normal file
0
test/unit_tests/server/__init__.py
Normal file
13
test/unit_tests/server/pyinfra_compat_test.py
Normal file
13
test/unit_tests/server/pyinfra_compat_test.py
Normal file
@ -0,0 +1,13 @@
|
||||
import pytest
|
||||
from funcy import first
|
||||
|
||||
from cv_analysis.server.pyinfra_compat import make_streamable_analysis_fn
|
||||
|
||||
|
||||
@pytest.mark.parametrize("operation", ["mock"])
|
||||
@pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)])
|
||||
def test_make_analysis_fn(analysis_fn_mock, random_image_metadata_package, expected_analyse_metadata):
|
||||
analyse = make_streamable_analysis_fn(analysis_fn_mock)
|
||||
results = first(analyse(random_image_metadata_package))
|
||||
|
||||
assert results["metadata"] == expected_analyse_metadata
|
||||
0
test/utils/__init__.py
Normal file
0
test/utils/__init__.py
Normal file
6
test/utils/utils.py
Normal file
6
test/utils/utils.py
Normal file
@ -0,0 +1,6 @@
|
||||
from itertools import chain, combinations
|
||||
|
||||
|
||||
def powerset(iterable):
|
||||
s = list(iterable)
|
||||
return list(chain.from_iterable(combinations(s, r) for r in range(1, len(s) + 1)))
|
||||
Loading…
x
Reference in New Issue
Block a user