Merge in RR/cv-analysis from pdf2image to master
Squashed commit of the following:
commit 1353f54d2dceb0a79b1f81bfa2c035f5a454275a
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Wed Aug 10 09:07:31 2022 +0200
add deRotation and transformation vie rectanglePlus
commit 51459dbf57a86e3eac66ec0da02de40dc1b68796
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Tue Aug 9 08:53:50 2022 +0200
add derotation and to pdf coords transformation to cv-analysis output
commit 733991e2f5a4664205b2f7cc756cebcbc9ee3930
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Mon Aug 8 15:15:13 2022 +0200
update pipline with detrotation logic WIP
40 lines
1.2 KiB
Python
40 lines
1.2 KiB
Python
from functools import partial
|
|
|
|
import cv2
|
|
import numpy as np
|
|
|
|
from cv_analysis.figure_detection.figures import detect_large_coherent_structures
|
|
from cv_analysis.figure_detection.text import remove_primary_text_regions
|
|
from cv_analysis.utils.filters import (
|
|
is_large_enough,
|
|
has_acceptable_format,
|
|
is_not_too_large,
|
|
)
|
|
from cv_analysis.utils.postprocessing import remove_included
|
|
from cv_analysis.utils.structures import Rectangle
|
|
|
|
|
|
def detect_figures(image: np.array):
|
|
max_area = image.shape[0] * image.shape[1] * 0.99
|
|
min_area = 5000
|
|
max_width_to_height_ratio = 6
|
|
figure_filter = partial(is_likely_figure, min_area, max_area, max_width_to_height_ratio)
|
|
|
|
image = remove_primary_text_regions(image)
|
|
cnts = detect_large_coherent_structures(image)
|
|
cnts = filter(figure_filter, cnts)
|
|
|
|
rects = map(cv2.boundingRect, cnts)
|
|
rects = map(Rectangle.from_xywh, rects)
|
|
rects = remove_included(rects)
|
|
|
|
return rects
|
|
|
|
|
|
def is_likely_figure(min_area, max_area, max_width_to_height_ratio, cnts):
|
|
return (
|
|
is_not_too_large(cnts, max_area)
|
|
and is_large_enough(cnts, min_area)
|
|
and has_acceptable_format(cnts, max_width_to_height_ratio)
|
|
)
|