cv-analysis-service/cv_analysis/figure_detection/figure_detection_pipeline.py
Isaac Riley beb40da3b1 Pull request #22: add single-cell filtering to table parsing and increase tolerance parameter to 7; refactor postprocessing to use the Rectangles data structure
Merge in RR/cv-analysis from remove_isolated to master

Squashed commit of the following:

commit 2613ed1615d1b69b3e4f2acea197993a91d00561
Author: Isaac Riley <Isaac.Riley@iqser.com>
Date:   Tue Aug 2 10:17:33 2022 +0200

    add single-cell filtering to table parsing and increase tolerance parameter to 7; refactored postprocessing to use the Rectangles data structure
2022-08-02 10:54:13 +02:00

44 lines
1.3 KiB
Python

from functools import partial
import cv2
import numpy as np
from cv_analysis.figure_detection.figures import detect_large_coherent_structures
from cv_analysis.figure_detection.text import remove_primary_text_regions
from cv_analysis.utils.filters import (
is_large_enough,
has_acceptable_format,
is_not_too_large,
)
from cv_analysis.utils.postprocessing import remove_included
from cv_analysis.utils.structures import Rectangle
def make_figure_detection_pipeline(min_area=5000, max_width_to_height_ratio=6):
def pipeline(image: np.array):
max_area = image.shape[0] * image.shape[1] * 0.99
filter_cnts = make_filter_likely_figures(min_area, max_area, max_width_to_height_ratio)
image = remove_primary_text_regions(image)
cnts = detect_large_coherent_structures(image)
cnts = filter_cnts(cnts)
rects = map(cv2.boundingRect, cnts)
rects = map(Rectangle.from_xywh, rects)
rects = remove_included(rects)
return rects
return pipeline
def make_filter_likely_figures(min_area, max_area, max_width_to_height_ratio):
def is_likely_figure(cnts):
return (
is_not_too_large(cnts, max_area)
and is_large_enough(cnts, min_area)
and has_acceptable_format(cnts, max_width_to_height_ratio)
)
return partial(filter, is_likely_figure)