Compare commits
164 Commits
master
...
refactorin
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e5832a1735 | ||
|
|
a1e6c9e553 | ||
|
|
21a9db25cd | ||
|
|
90c367cc32 | ||
|
|
42d285e35b | ||
|
|
ddc92461d7 | ||
|
|
d2cb78d38f | ||
|
|
9c401a977c | ||
|
|
b77951d4fe | ||
|
|
c7b224a98a | ||
|
|
f0072b0852 | ||
|
|
9fd87aff8e | ||
|
|
6728642a4f | ||
|
|
cc86a79ac7 | ||
|
|
160d5b3473 | ||
|
|
7b2f921472 | ||
|
|
e258df899f | ||
|
|
cef97b33f9 | ||
|
|
a54ccb2fdf | ||
|
|
1de938f2fa | ||
|
|
de9b3bad93 | ||
|
|
9480d58a8a | ||
|
|
cc0094d3f7 | ||
|
|
93a52080df | ||
|
|
4ec3429dec | ||
|
|
bdcb2f1bef | ||
|
|
845d169194 | ||
|
|
56c10490b9 | ||
|
|
740a9cb3c2 | ||
|
|
b3cf3e4454 | ||
|
|
2fb450943e | ||
|
|
fd76933b5a | ||
|
|
8095900543 | ||
|
|
d42f053c81 | ||
|
|
04a617b9df | ||
|
|
ba901473fe | ||
|
|
e8b4467265 | ||
|
|
4c65d906b8 | ||
|
|
667b4a4858 | ||
|
|
83e6dc3ce7 | ||
|
|
fb69eb7f5c | ||
|
|
f98256d7e9 | ||
|
|
cbb3a8cc61 | ||
|
|
9f9face8f0 | ||
|
|
f2af040c5b | ||
|
|
6dbe3b6fc9 | ||
|
|
a3fece8096 | ||
|
|
26180373a0 | ||
|
|
186b4530f0 | ||
|
|
a1ccda4ea9 | ||
|
|
25d35e2349 | ||
|
|
daea7d2bf7 | ||
|
|
d5e501a05d | ||
|
|
d9d363834a | ||
|
|
5dc13e7137 | ||
|
|
826cd3b6a9 | ||
|
|
4f788af35b | ||
|
|
10ea584143 | ||
|
|
7676a8148e | ||
|
|
cee5e69a4b | ||
|
|
e715c86f8d | ||
|
|
c5ba489931 | ||
|
|
3772ca021a | ||
|
|
c4eeb956ca | ||
|
|
d823ebf7c6 | ||
|
|
71ffb28381 | ||
|
|
9dfbe9a142 | ||
|
|
0eb57056ba | ||
|
|
70802d6341 | ||
|
|
52776494cb | ||
|
|
7d8842b4ac | ||
|
|
9e77e25afb | ||
|
|
b3480491be | ||
|
|
3d0c2396ee | ||
|
|
f8c2d691b2 | ||
|
|
ced1cd9559 | ||
|
|
738c51a337 | ||
|
|
48f6aebc13 | ||
|
|
73d546367c | ||
|
|
cfe4b58e38 | ||
|
|
839a264816 | ||
|
|
fd57fe99b7 | ||
|
|
5e51fd1d10 | ||
|
|
9c7c5e315f | ||
|
|
3da613af94 | ||
|
|
30e6350881 | ||
|
|
384f0e5f28 | ||
|
|
4d181448b6 | ||
|
|
a5cd3d6ec9 | ||
|
|
893622a73e | ||
|
|
4d11a157e5 | ||
|
|
4c10d521e2 | ||
|
|
0f6cbec1d5 | ||
|
|
54484d9ad0 | ||
|
|
ca190721d6 | ||
|
|
5611314ff3 | ||
|
|
4ecfe16df5 | ||
|
|
38c0614396 | ||
|
|
64565f9cb0 | ||
|
|
232c6bed4b | ||
|
|
8d34873d1c | ||
|
|
78a951a319 | ||
|
|
8d57d2043d | ||
|
|
41fdda4955 | ||
|
|
4dfdd579a2 | ||
|
|
e831ab1382 | ||
|
|
6fead2d9b9 | ||
|
|
1012988475 | ||
|
|
5bc1550eae | ||
|
|
29741fc5da | ||
|
|
4772e3037c | ||
|
|
dd6ab94aa2 | ||
|
|
eaca8725de | ||
|
|
4af202f098 | ||
|
|
1199845cdf | ||
|
|
4578413748 | ||
|
|
d5d67cb064 | ||
|
|
d8542762e6 | ||
|
|
caef416077 | ||
|
|
a8708ffc56 | ||
|
|
3f0bbf0fc7 | ||
|
|
2fec39eda6 | ||
|
|
16cc0007ed | ||
|
|
3d83489819 | ||
|
|
3134021596 | ||
|
|
3cb857d830 | ||
|
|
194102939e | ||
|
|
5d1d9516b5 | ||
|
|
77f85e9de1 | ||
|
|
c00081b2bc | ||
|
|
619f67f1fd | ||
|
|
a97f8def7c | ||
|
|
65e9735bd9 | ||
|
|
689be75478 | ||
|
|
acf46a7a48 | ||
|
|
0f11441b20 | ||
|
|
fa1fa15cc8 | ||
|
|
17c40c996a | ||
|
|
99af2943b5 | ||
|
|
0e6cb495e8 | ||
|
|
012e705e70 | ||
|
|
8327794685 | ||
|
|
72bc52dc7b | ||
|
|
557d091a54 | ||
|
|
b540cfd0f2 | ||
|
|
8824c5c3ea | ||
|
|
94e9210faf | ||
|
|
06d6863cc5 | ||
|
|
dfd87cb4b0 | ||
|
|
cd5457840b | ||
|
|
eee2f0e256 | ||
|
|
9d2f166fbf | ||
|
|
97fb4b645d | ||
|
|
00e53fb54d | ||
|
|
4be91de036 | ||
|
|
8c6b940364 | ||
|
|
cdb12baccd | ||
|
|
ac84494613 | ||
|
|
77f565c652 | ||
|
|
47e657aaa3 | ||
|
|
b592497b75 | ||
|
|
c0d961bc39 | ||
|
|
8260ae58f9 | ||
|
|
068f75d35b |
@ -1,17 +1,17 @@
|
|||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
import cv2
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from funcy import lmap
|
||||||
|
|
||||||
from cv_analysis.figure_detection.figures import detect_large_coherent_structures
|
from cv_analysis.figure_detection.figures import detect_large_coherent_structures
|
||||||
from cv_analysis.figure_detection.text import remove_primary_text_regions
|
from cv_analysis.figure_detection.text import remove_primary_text_regions
|
||||||
|
from cv_analysis.utils.conversion import contour_to_rectangle
|
||||||
from cv_analysis.utils.filters import (
|
from cv_analysis.utils.filters import (
|
||||||
is_large_enough,
|
is_large_enough,
|
||||||
has_acceptable_format,
|
has_acceptable_format,
|
||||||
is_not_too_large,
|
is_small_enough,
|
||||||
)
|
)
|
||||||
from cv_analysis.utils.postprocessing import remove_included
|
from cv_analysis.utils.postprocessing import remove_included
|
||||||
from cv_analysis.utils.structures import Rectangle
|
|
||||||
|
|
||||||
|
|
||||||
def detect_figures(image: np.array):
|
def detect_figures(image: np.array):
|
||||||
@ -21,19 +21,18 @@ def detect_figures(image: np.array):
|
|||||||
figure_filter = partial(is_likely_figure, min_area, max_area, max_width_to_height_ratio)
|
figure_filter = partial(is_likely_figure, min_area, max_area, max_width_to_height_ratio)
|
||||||
|
|
||||||
image = remove_primary_text_regions(image)
|
image = remove_primary_text_regions(image)
|
||||||
cnts = detect_large_coherent_structures(image)
|
contours = detect_large_coherent_structures(image)
|
||||||
cnts = filter(figure_filter, cnts)
|
contours = filter(figure_filter, contours)
|
||||||
|
|
||||||
rects = map(cv2.boundingRect, cnts)
|
rectangles = lmap(contour_to_rectangle, contours)
|
||||||
rects = map(Rectangle.from_xywh, rects)
|
rectangles = remove_included(rectangles)
|
||||||
rects = remove_included(rects)
|
|
||||||
|
|
||||||
return rects
|
return rectangles
|
||||||
|
|
||||||
|
|
||||||
def is_likely_figure(min_area, max_area, max_width_to_height_ratio, cnts):
|
def is_likely_figure(min_area, max_area, max_width_to_height_ratio, contours):
|
||||||
return (
|
return (
|
||||||
is_not_too_large(cnts, max_area)
|
is_small_enough(contours, max_area)
|
||||||
and is_large_enough(cnts, min_area)
|
and is_large_enough(contours, min_area)
|
||||||
and has_acceptable_format(cnts, max_width_to_height_ratio)
|
and has_acceptable_format(contours, max_width_to_height_ratio)
|
||||||
)
|
)
|
||||||
|
|||||||
@ -1,25 +1,33 @@
|
|||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
from cv_analysis.utils.common import find_contours_and_hierarchies
|
||||||
|
|
||||||
|
|
||||||
def detect_large_coherent_structures(image: np.array):
|
def detect_large_coherent_structures(image: np.array):
|
||||||
"""Detects large coherent structures on an image.
|
"""Detects large coherent structures in an image.
|
||||||
Expects an image with binary color space (e.g. threshold applied).
|
Expects an image with binary color space (e.g. threshold applied).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image (np.array): Image to look for large coherent structures in.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
contours
|
list: List of contours.
|
||||||
|
|
||||||
References:
|
References:
|
||||||
https://stackoverflow.com/questions/60259169/how-to-group-nearby-contours-in-opencv-python-zebra-crossing-detection
|
https://stackoverflow.com/questions/60259169/how-to-group-nearby-contours-in-opencv-python-zebra-crossing-detection
|
||||||
"""
|
"""
|
||||||
assert len(image.shape) == 2
|
assert len(image.shape) == 2
|
||||||
|
|
||||||
|
# FIXME: Parameterize via factory
|
||||||
dilate_kernel = cv2.getStructuringElement(cv2.MORPH_OPEN, (5, 5))
|
dilate_kernel = cv2.getStructuringElement(cv2.MORPH_OPEN, (5, 5))
|
||||||
|
# FIXME: Parameterize via factory
|
||||||
dilate = cv2.dilate(image, dilate_kernel, iterations=4)
|
dilate = cv2.dilate(image, dilate_kernel, iterations=4)
|
||||||
|
# FIXME: Parameterize via factory
|
||||||
close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 20))
|
close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 20))
|
||||||
close = cv2.morphologyEx(dilate, cv2.MORPH_CLOSE, close_kernel, iterations=1)
|
# FIXME: Parameterize via factory
|
||||||
|
close = cv2.morphologyEx(dilate, cv2.MORPH_CLOSE, close_kernel, iterations=1) # TODO: Tweak iterations
|
||||||
|
|
||||||
cnts, _ = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
contours, _ = find_contours_and_hierarchies(close)
|
||||||
|
|
||||||
return cnts
|
return contours
|
||||||
|
|||||||
@ -1,5 +1,7 @@
|
|||||||
import cv2
|
import cv2
|
||||||
|
|
||||||
|
from cv_analysis.utils.common import normalize_to_gray_scale
|
||||||
|
|
||||||
|
|
||||||
def remove_primary_text_regions(image):
|
def remove_primary_text_regions(image):
|
||||||
"""Removes regions of primary text, meaning no figure descriptions for example, but main text body paragraphs.
|
"""Removes regions of primary text, meaning no figure descriptions for example, but main text body paragraphs.
|
||||||
@ -35,6 +37,7 @@ def remove_primary_text_regions(image):
|
|||||||
|
|
||||||
def apply_threshold_to_image(image):
|
def apply_threshold_to_image(image):
|
||||||
"""Converts an image to black and white."""
|
"""Converts an image to black and white."""
|
||||||
|
image = normalize_to_gray_scale(image)
|
||||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image
|
||||||
return cv2.threshold(image, 253, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
|
return cv2.threshold(image, 253, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
|
||||||
|
|
||||||
|
|||||||
@ -1,87 +1,80 @@
|
|||||||
import itertools
|
from functools import partial
|
||||||
from itertools import compress
|
from typing import Iterable, List
|
||||||
from itertools import starmap
|
|
||||||
from operator import __and__
|
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from funcy import compose, rcompose, lkeep
|
||||||
|
|
||||||
|
from cv_analysis.utils import lstarkeep
|
||||||
from cv_analysis.utils.connect_rects import connect_related_rects2
|
from cv_analysis.utils.common import (
|
||||||
from cv_analysis.utils.structures import Rectangle
|
find_contours_and_hierarchies,
|
||||||
from cv_analysis.utils.postprocessing import (
|
dilate_page_components,
|
||||||
remove_overlapping,
|
normalize_to_gray_scale,
|
||||||
remove_included,
|
threshold_image,
|
||||||
has_no_parent,
|
invert_image,
|
||||||
|
fill_rectangles,
|
||||||
)
|
)
|
||||||
from cv_analysis.utils.visual_logging import vizlogger
|
from cv_analysis.utils.conversion import contour_to_rectangle
|
||||||
|
from cv_analysis.utils.merging import merge_related_rectangles
|
||||||
#could be dynamic parameter is the scan is noisy
|
from cv_analysis.utils.postprocessing import remove_included, has_no_parent
|
||||||
def is_likely_segment(rect, min_area=100):
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
return cv2.contourArea(rect, False) > min_area
|
|
||||||
|
|
||||||
|
|
||||||
def find_segments(image):
|
def parse_layout(image: np.array) -> List[Rectangle]:
|
||||||
contours, hierarchies = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
"""Parse the layout of a page.
|
||||||
mask1 = map(is_likely_segment, contours)
|
|
||||||
mask2 = map(has_no_parent, hierarchies[0])
|
|
||||||
mask = starmap(__and__, zip(mask1, mask2))
|
|
||||||
contours = compress(contours, mask)
|
|
||||||
|
|
||||||
rectangles = (cv2.boundingRect(c) for c in contours)
|
Args:
|
||||||
|
image: Image of the page.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of rectangles representing the layout of the page as identified page elements.
|
||||||
|
"""
|
||||||
|
rectangles = rcompose(
|
||||||
|
find_segments,
|
||||||
|
remove_included,
|
||||||
|
merge_related_rectangles,
|
||||||
|
remove_included,
|
||||||
|
)(image)
|
||||||
|
|
||||||
return rectangles
|
return rectangles
|
||||||
|
|
||||||
|
|
||||||
def dilate_page_components(image):
|
def find_segments(image: np.ndarray) -> List[Rectangle]:
|
||||||
#if text is detected in words make kernel bigger
|
"""Find segments in a page. Segments are structural elements of a page, such as text blocks, tables, etc."""
|
||||||
image = cv2.GaussianBlur(image, (7, 7), 0)
|
rectangles = rcompose(
|
||||||
thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
|
prepare_for_initial_detection,
|
||||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
|
__find_segments,
|
||||||
return cv2.dilate(thresh, kernel, iterations=4)
|
partial(prepare_for_meta_detection, image.copy()),
|
||||||
|
__find_segments,
|
||||||
|
)(image)
|
||||||
|
|
||||||
|
return rectangles
|
||||||
|
|
||||||
|
|
||||||
def fill_in_component_area(image, rect):
|
def prepare_for_initial_detection(image: np.ndarray) -> np.ndarray:
|
||||||
x, y, w, h = rect
|
return compose(dilate_page_components, normalize_to_gray_scale)(image)
|
||||||
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1)
|
|
||||||
cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 7)
|
|
||||||
_, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY)
|
|
||||||
return ~image
|
|
||||||
|
|
||||||
|
|
||||||
|
def __find_segments(image: np.ndarray) -> List[Rectangle]:
|
||||||
|
def to_rectangle_if_valid(contour, hierarchy):
|
||||||
|
return contour_to_rectangle(contour) if is_likely_segment(contour) and has_no_parent(hierarchy) else None
|
||||||
|
|
||||||
def parse_layout(image: np.array):
|
rectangles = lstarkeep(to_rectangle_if_valid, zip(*find_contours_and_hierarchies(image)))
|
||||||
image = image.copy()
|
|
||||||
image_ = image.copy()
|
|
||||||
|
|
||||||
if len(image_.shape) > 2:
|
return rectangles
|
||||||
image_ = cv2.cvtColor(image_, cv2.COLOR_BGR2GRAY)
|
|
||||||
|
|
||||||
dilate = dilate_page_components(image_)
|
|
||||||
# show_mpl(dilate)
|
|
||||||
|
|
||||||
rects = list(find_segments(dilate))
|
def prepare_for_meta_detection(image: np.ndarray, rectangles: Iterable[Rectangle]) -> np.ndarray:
|
||||||
|
image = rcompose(
|
||||||
|
fill_rectangles,
|
||||||
|
threshold_image,
|
||||||
|
invert_image,
|
||||||
|
normalize_to_gray_scale,
|
||||||
|
)(image, rectangles)
|
||||||
|
|
||||||
# -> Run meta detection on the previous detections TODO: refactor
|
return image
|
||||||
for rect in rects:
|
|
||||||
x, y, w, h = rect
|
|
||||||
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1)
|
|
||||||
cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 7)
|
|
||||||
# show_mpl(image)
|
|
||||||
_, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY)
|
|
||||||
image = ~image
|
|
||||||
# show_mpl(image)
|
|
||||||
if len(image.shape) > 2:
|
|
||||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
||||||
|
|
||||||
rects = find_segments(image)
|
|
||||||
# <- End of meta detection
|
|
||||||
rects = list(map(Rectangle.from_xywh, rects))
|
|
||||||
rects = remove_included(rects)
|
|
||||||
|
|
||||||
rects = map(lambda r: r.xywh(), rects)
|
def is_likely_segment(rectangle: Rectangle, min_area: float = 100) -> bool:
|
||||||
rects = connect_related_rects2(rects)
|
# FIXME: Parameterize via factory
|
||||||
rects = list(map(Rectangle.from_xywh, rects))
|
return cv2.contourArea(rectangle, False) > min_area
|
||||||
rects = remove_included(rects)
|
|
||||||
|
|
||||||
return rects
|
|
||||||
|
|||||||
@ -5,5 +5,8 @@ from pathlib import Path
|
|||||||
MODULE_PATH = Path(__file__).resolve().parents[0]
|
MODULE_PATH = Path(__file__).resolve().parents[0]
|
||||||
PACKAGE_ROOT_PATH = MODULE_PATH.parents[0]
|
PACKAGE_ROOT_PATH = MODULE_PATH.parents[0]
|
||||||
REPO_ROOT_PATH = PACKAGE_ROOT_PATH
|
REPO_ROOT_PATH = PACKAGE_ROOT_PATH
|
||||||
|
|
||||||
TEST_DIR_PATH = REPO_ROOT_PATH / "test"
|
TEST_DIR_PATH = REPO_ROOT_PATH / "test"
|
||||||
TEST_DATA_DVC = TEST_DIR_PATH / "test_data.dvc"
|
TEST_DATA_DVC = TEST_DIR_PATH / "test_data.dvc" # TODO: remove once new tests are in place
|
||||||
|
TEST_DATA_DIR = TEST_DIR_PATH / "data"
|
||||||
|
TEST_PAGE_TEXTURES_DIR = TEST_DATA_DIR / "paper"
|
||||||
|
|||||||
@ -5,7 +5,7 @@ import numpy as np
|
|||||||
from iteration_utilities import starfilter, first
|
from iteration_utilities import starfilter, first
|
||||||
|
|
||||||
from cv_analysis.utils.filters import is_large_enough, is_filled, is_boxy
|
from cv_analysis.utils.filters import is_large_enough, is_filled, is_boxy
|
||||||
from cv_analysis.utils.visual_logging import vizlogger
|
from cv_analysis.utils.visual_logger import vizlogger
|
||||||
|
|
||||||
|
|
||||||
def is_likely_redaction(contour, hierarchy, min_area):
|
def is_likely_redaction(contour, hierarchy, min_area):
|
||||||
|
|||||||
@ -5,34 +5,29 @@ from funcy import lmap, flatten
|
|||||||
|
|
||||||
from cv_analysis.figure_detection.figure_detection import detect_figures
|
from cv_analysis.figure_detection.figure_detection import detect_figures
|
||||||
from cv_analysis.table_parsing import parse_tables
|
from cv_analysis.table_parsing import parse_tables
|
||||||
from cv_analysis.utils.structures import Rectangle
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
from pdf2img.conversion import convert_pages_to_images
|
from pdf2img.conversion import convert_pages_to_images
|
||||||
from pdf2img.default_objects.image import ImagePlus, ImageInfo
|
from pdf2img.default_objects.image import ImagePlus, ImageInfo
|
||||||
from pdf2img.default_objects.rectangle import RectanglePlus
|
from pdf2img.default_objects.rectangle import RectanglePlus
|
||||||
|
|
||||||
|
|
||||||
def get_analysis_pipeline(operation, table_parsing_skip_pages_without_images):
|
def make_analysis_pipeline_for_element_type(segment_type, **kwargs):
|
||||||
if operation == "table":
|
if segment_type == "table":
|
||||||
return make_analysis_pipeline(
|
return make_analysis_pipeline(parse_tables, table_parsing_formatter, dpi=200, **kwargs)
|
||||||
parse_tables,
|
elif segment_type == "figure":
|
||||||
table_parsing_formatter,
|
return make_analysis_pipeline(detect_figures, figure_detection_formatter, dpi=200, **kwargs)
|
||||||
dpi=200,
|
|
||||||
skip_pages_without_images=table_parsing_skip_pages_without_images,
|
|
||||||
)
|
|
||||||
elif operation == "figure":
|
|
||||||
return make_analysis_pipeline(detect_figures, figure_detection_formatter, dpi=200)
|
|
||||||
else:
|
else:
|
||||||
raise
|
raise ValueError(f"Unknown segment type {segment_type}.")
|
||||||
|
|
||||||
|
|
||||||
def make_analysis_pipeline(analysis_fn, formatter, dpi, skip_pages_without_images=False):
|
def make_analysis_pipeline(analysis_fn, formatter, dpi, skip_pages_without_images=False):
|
||||||
def analyse_pipeline(pdf: bytes, index=None):
|
def analysis_pipeline(pdf: bytes, index=None):
|
||||||
def parse_page(page: ImagePlus):
|
def parse_page(page: ImagePlus):
|
||||||
image = page.asarray()
|
image = page.asarray()
|
||||||
rects = analysis_fn(image)
|
rectangles = analysis_fn(image)
|
||||||
if not rects:
|
if not rectangles:
|
||||||
return
|
return
|
||||||
infos = formatter(rects, page, dpi)
|
infos = formatter(rectangles, page, dpi)
|
||||||
return infos
|
return infos
|
||||||
|
|
||||||
pages = convert_pages_to_images(pdf, index=index, dpi=dpi, skip_pages_without_images=skip_pages_without_images)
|
pages = convert_pages_to_images(pdf, index=index, dpi=dpi, skip_pages_without_images=skip_pages_without_images)
|
||||||
@ -40,22 +35,26 @@ def make_analysis_pipeline(analysis_fn, formatter, dpi, skip_pages_without_image
|
|||||||
|
|
||||||
yield from flatten(filter(truth, results))
|
yield from flatten(filter(truth, results))
|
||||||
|
|
||||||
return analyse_pipeline
|
return analysis_pipeline
|
||||||
|
|
||||||
|
|
||||||
def table_parsing_formatter(rects, page: ImagePlus, dpi):
|
def table_parsing_formatter(rectangles, page: ImagePlus, dpi):
|
||||||
def format_rect(rect: Rectangle):
|
def format_rectangle(rectangle: Rectangle):
|
||||||
rect_plus = RectanglePlus.from_pixels(*rect.xyxy(), page.info, alpha=False, dpi=dpi)
|
rectangle_plus = RectanglePlus.from_pixels(*rectangle_to_xyxy(rectangle), page.info, alpha=False, dpi=dpi)
|
||||||
return rect_plus.asdict(derotate=True)
|
return rectangle_plus.asdict(derotate=True)
|
||||||
|
|
||||||
bboxes = lmap(format_rect, rects)
|
bboxes = lmap(format_rectangle, rectangles)
|
||||||
|
|
||||||
return {"pageInfo": page.asdict(natural_index=True), "tableCells": bboxes}
|
return {"pageInfo": page.asdict(natural_index=True), "tableCells": bboxes}
|
||||||
|
|
||||||
|
|
||||||
def figure_detection_formatter(rects, page, dpi):
|
def figure_detection_formatter(rectangles, page, dpi):
|
||||||
def format_rect(rect: Rectangle):
|
def format_rectangle(rectangle: Rectangle):
|
||||||
rect_plus = RectanglePlus.from_pixels(*rect.xyxy(), page.info, alpha=False, dpi=dpi)
|
rect_plus = RectanglePlus.from_pixels(*rectangle_to_xyxy(rectangle), page.info, alpha=False, dpi=dpi)
|
||||||
return asdict(ImageInfo(page.info, rect_plus.asbbox(derotate=False), rect_plus.alpha))
|
return asdict(ImageInfo(page.info, rect_plus.asbbox(derotate=False), rect_plus.alpha))
|
||||||
|
|
||||||
return lmap(format_rect, rects)
|
return lmap(format_rectangle, rectangles)
|
||||||
|
|
||||||
|
|
||||||
|
def rectangle_to_xyxy(rectangle: Rectangle):
|
||||||
|
return rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y2
|
||||||
|
|||||||
@ -1,15 +1,11 @@
|
|||||||
from functools import partial
|
|
||||||
from itertools import chain, starmap
|
|
||||||
from operator import attrgetter
|
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from funcy import lmap, lfilter
|
from funcy import lmap, lfilter
|
||||||
|
|
||||||
from cv_analysis.layout_parsing import parse_layout
|
from cv_analysis.layout_parsing import parse_layout
|
||||||
from cv_analysis.utils.postprocessing import remove_isolated # xywh_to_vecs, xywh_to_vec_rect, adjacent1d
|
from cv_analysis.utils.conversion import box_to_rectangle
|
||||||
from cv_analysis.utils.structures import Rectangle
|
from cv_analysis.utils.postprocessing import remove_isolated
|
||||||
from cv_analysis.utils.visual_logging import vizlogger
|
from cv_analysis.utils.visual_logger import vizlogger
|
||||||
|
|
||||||
|
|
||||||
def add_external_contours(image, image_h_w_lines_only):
|
def add_external_contours(image, image_h_w_lines_only):
|
||||||
@ -31,8 +27,7 @@ def apply_motion_blur(image: np.array, angle, size=80):
|
|||||||
size (int): kernel size; 80 found empirically to work well
|
size (int): kernel size; 80 found empirically to work well
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.array
|
np.ndarray
|
||||||
|
|
||||||
"""
|
"""
|
||||||
k = np.zeros((size, size), dtype=np.float32)
|
k = np.zeros((size, size), dtype=np.float32)
|
||||||
vizlogger.debug(k, "tables08_blur_kernel1.png")
|
vizlogger.debug(k, "tables08_blur_kernel1.png")
|
||||||
@ -55,10 +50,9 @@ def isolate_vertical_and_horizontal_components(img_bin):
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
img_bin (np.array): array corresponding to single binarized page image
|
img_bin (np.array): array corresponding to single binarized page image
|
||||||
bounding_rects (list): list of layout boxes of the form (x, y, w, h), potentially containing tables
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.array
|
np.ndarray
|
||||||
"""
|
"""
|
||||||
line_min_width = 48
|
line_min_width = 48
|
||||||
kernel_h = np.ones((1, line_min_width), np.uint8)
|
kernel_h = np.ones((1, line_min_width), np.uint8)
|
||||||
@ -90,10 +84,9 @@ def find_table_layout_boxes(image: np.array):
|
|||||||
def is_large_enough(box):
|
def is_large_enough(box):
|
||||||
(x, y, w, h) = box
|
(x, y, w, h) = box
|
||||||
if w * h >= 100000:
|
if w * h >= 100000:
|
||||||
return Rectangle.from_xywh(box)
|
return box_to_rectangle(box)
|
||||||
|
|
||||||
layout_boxes = parse_layout(image)
|
layout_boxes = parse_layout(image)
|
||||||
a = lmap(is_large_enough, layout_boxes)
|
|
||||||
return lmap(is_large_enough, layout_boxes)
|
return lmap(is_large_enough, layout_boxes)
|
||||||
|
|
||||||
|
|
||||||
@ -103,7 +96,7 @@ def preprocess(image: np.array):
|
|||||||
return ~image
|
return ~image
|
||||||
|
|
||||||
|
|
||||||
def turn_connected_components_into_rects(image: np.array):
|
def turn_connected_components_into_rectangles(image: np.array):
|
||||||
def is_large_enough(stat):
|
def is_large_enough(stat):
|
||||||
x1, y1, w, h, area = stat
|
x1, y1, w, h, area = stat
|
||||||
return area > 2000 and w > 35 and h > 25
|
return area > 2000 and w > 35 and h > 25
|
||||||
@ -117,7 +110,7 @@ def turn_connected_components_into_rects(image: np.array):
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
def parse_tables(image: np.array, show=False):
|
def parse_tables(image: np.array):
|
||||||
"""Runs the full table parsing process.
|
"""Runs the full table parsing process.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -129,11 +122,8 @@ def parse_tables(image: np.array, show=False):
|
|||||||
|
|
||||||
image = preprocess(image)
|
image = preprocess(image)
|
||||||
image = isolate_vertical_and_horizontal_components(image)
|
image = isolate_vertical_and_horizontal_components(image)
|
||||||
rects = turn_connected_components_into_rects(image)
|
boxes = turn_connected_components_into_rectangles(image)
|
||||||
#print(rects, "\n\n")
|
rectangles = lmap(box_to_rectangle, boxes)
|
||||||
rects = list(map(Rectangle.from_xywh, rects))
|
rectangles = remove_isolated(rectangles)
|
||||||
#print(rects, "\n\n")
|
|
||||||
rects = remove_isolated(rects)
|
return rectangles
|
||||||
#print(rects, "\n\n")
|
|
||||||
|
|
||||||
return rects
|
|
||||||
|
|||||||
51
cv_analysis/utils/common.py
Normal file
51
cv_analysis/utils/common.py
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
from functools import reduce
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from funcy import first
|
||||||
|
|
||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
|
||||||
|
|
||||||
|
def find_contours_and_hierarchies(image):
|
||||||
|
contours, hierarchies = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
return contours, first(hierarchies) if hierarchies is not None else None
|
||||||
|
|
||||||
|
|
||||||
|
def dilate_page_components(image: np.ndarray) -> np.ndarray:
|
||||||
|
# FIXME: Parameterize via factory
|
||||||
|
image = cv2.GaussianBlur(image, (7, 7), 0)
|
||||||
|
# FIXME: Parameterize via factory
|
||||||
|
thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
|
||||||
|
# FIXME: Parameterize via factory
|
||||||
|
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
|
||||||
|
# FIXME: Parameterize via factory
|
||||||
|
dilate = cv2.dilate(thresh, kernel, iterations=4)
|
||||||
|
return dilate
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_to_gray_scale(image: np.ndarray) -> np.ndarray:
|
||||||
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image
|
||||||
|
return image
|
||||||
|
|
||||||
|
|
||||||
|
def threshold_image(image: np.ndarray) -> np.ndarray:
|
||||||
|
# FIXME: Parameterize via factory
|
||||||
|
_, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY)
|
||||||
|
return image
|
||||||
|
|
||||||
|
|
||||||
|
def invert_image(image: np.ndarray):
|
||||||
|
return ~image
|
||||||
|
|
||||||
|
|
||||||
|
def fill_rectangles(image: np.ndarray, rectangles: Iterable[Rectangle]) -> np.ndarray:
|
||||||
|
image = reduce(fill_in_component_area, rectangles, image)
|
||||||
|
return image
|
||||||
|
|
||||||
|
|
||||||
|
def fill_in_component_area(image: np.ndarray, rectangle: Rectangle) -> np.ndarray:
|
||||||
|
cv2.rectangle(image, (rectangle.x1, rectangle.y1), (rectangle.x2, rectangle.y2), (0, 0, 0), -1)
|
||||||
|
cv2.rectangle(image, (rectangle.x1, rectangle.y1), (rectangle.x2, rectangle.y2), (255, 255, 255), 7)
|
||||||
|
return image
|
||||||
@ -1,120 +0,0 @@
|
|||||||
from itertools import combinations, starmap, product
|
|
||||||
from typing import Iterable
|
|
||||||
|
|
||||||
|
|
||||||
def is_near_enough(rect_pair, max_gap=14):
|
|
||||||
x1, y1, w1, h1 = rect_pair[0]
|
|
||||||
x2, y2, w2, h2 = rect_pair[1]
|
|
||||||
|
|
||||||
return any([abs(x1 - (x2 + w2)) <= max_gap,
|
|
||||||
abs(x2 - (x1 + w1)) <= max_gap,
|
|
||||||
abs(y2 - (y1 + h1)) <= max_gap,
|
|
||||||
abs(y1 - (y2 + h2)) <= max_gap])
|
|
||||||
|
|
||||||
|
|
||||||
def is_overlapping(rect_pair):
|
|
||||||
x1, y1, w1, h1 = rect_pair[0]
|
|
||||||
x2, y2, w2, h2 = rect_pair[1]
|
|
||||||
dx = min(x1 + w1, x2 + w2) - max(x1, x2)
|
|
||||||
dy = min(y1 + h1, y2 + h2) - max(y1, y2)
|
|
||||||
return True if (dx >= 0) and (dy >= 0) else False
|
|
||||||
|
|
||||||
|
|
||||||
def is_on_same_line(rect_pair):
|
|
||||||
x1, y1, w1, h1 = rect_pair[0]
|
|
||||||
x2, y2, w2, h2 = rect_pair[1]
|
|
||||||
return any([any([abs(y1 - y2) <= 10,
|
|
||||||
abs(y1 + h1 - (y2 + h2)) <= 10]),
|
|
||||||
any([y2 <= y1 and y1 + h1 <= y2 + h2,
|
|
||||||
y1 <= y2 and y2 + h2 <= y1 + h1])])
|
|
||||||
|
|
||||||
|
|
||||||
def has_correct_position1(rect_pair):
|
|
||||||
x1, y1, w1, h1 = rect_pair[0]
|
|
||||||
x2, y2, w2, h2 = rect_pair[1]
|
|
||||||
return any([any([abs(x1 - x2) <= 10,
|
|
||||||
abs(y1 - y2) <= 10,
|
|
||||||
abs(x1 + w1 - (x2 + w2)) <= 10,
|
|
||||||
abs(y1 + h1 - (y2 + h2)) <= 10]),
|
|
||||||
any([y2 <= y1 and y1 + h1 <= y2 + h2,
|
|
||||||
y1 <= y2 and y2 + h2 <= y1 + h1,
|
|
||||||
x2 <= x1 and x1 + w1 <= x2 + w2,
|
|
||||||
x1 <= x2 and x2 + w2 <= x1 + w1])])
|
|
||||||
|
|
||||||
|
|
||||||
def is_related(rect_pair):
|
|
||||||
return (is_near_enough(rect_pair) and has_correct_position1(rect_pair)) or is_overlapping(
|
|
||||||
rect_pair)
|
|
||||||
|
|
||||||
|
|
||||||
def fuse_rects(rect1, rect2):
|
|
||||||
if rect1 == rect2:
|
|
||||||
return rect1
|
|
||||||
x1, y1, w1, h1 = rect1
|
|
||||||
x2, y2, w2, h2 = rect2
|
|
||||||
|
|
||||||
topleft = list(min(product([x1, x2], [y1, y2])))
|
|
||||||
bottomright = list(max(product([x1 + w1, x2 + w2], [y1 + h1, y2 + h2])))
|
|
||||||
|
|
||||||
w = [bottomright[0] - topleft[0]]
|
|
||||||
h = [bottomright[1] - topleft[1]]
|
|
||||||
return tuple(topleft + w + h)
|
|
||||||
|
|
||||||
|
|
||||||
def rects_not_the_same(r):
|
|
||||||
return r[0] != r[1]
|
|
||||||
|
|
||||||
|
|
||||||
def find_related_rects(rects):
|
|
||||||
rect_pairs = list(filter(is_related, combinations(rects, 2)))
|
|
||||||
rect_pairs = list(filter(rects_not_the_same, rect_pairs))
|
|
||||||
if not rect_pairs:
|
|
||||||
return [], rects
|
|
||||||
rel_rects = list(set([rect for pair in rect_pairs for rect in pair]))
|
|
||||||
unrel_rects = [rect for rect in rects if rect not in rel_rects]
|
|
||||||
return rect_pairs, unrel_rects
|
|
||||||
|
|
||||||
|
|
||||||
def connect_related_rects(rects):
|
|
||||||
rects_to_connect, rects_new = find_related_rects(rects)
|
|
||||||
|
|
||||||
while len(rects_to_connect) > 0:
|
|
||||||
rects_fused = list(starmap(fuse_rects, rects_to_connect))
|
|
||||||
rects_fused = list(dict.fromkeys(rects_fused))
|
|
||||||
|
|
||||||
if len(rects_fused) == 1:
|
|
||||||
rects_new += rects_fused
|
|
||||||
rects_fused = []
|
|
||||||
|
|
||||||
rects_to_connect, connected_rects = find_related_rects(rects_fused)
|
|
||||||
rects_new += connected_rects
|
|
||||||
|
|
||||||
if len(rects_to_connect) > 1 and len(set(rects_to_connect)) == 1:
|
|
||||||
rects_new.append(rects_fused[0])
|
|
||||||
rects_to_connect = []
|
|
||||||
|
|
||||||
return rects_new
|
|
||||||
|
|
||||||
|
|
||||||
def connect_related_rects2(rects: Iterable[tuple]):
|
|
||||||
rects = list(rects)
|
|
||||||
current_idx = 0
|
|
||||||
|
|
||||||
while True:
|
|
||||||
if current_idx + 1 >= len(rects) or len(rects) <= 1:
|
|
||||||
break
|
|
||||||
merge_happened = False
|
|
||||||
current_rect = rects.pop(current_idx)
|
|
||||||
for idx, maybe_related_rect in enumerate(rects):
|
|
||||||
if is_related((current_rect, maybe_related_rect)):
|
|
||||||
current_rect = fuse_rects(current_rect, maybe_related_rect)
|
|
||||||
rects.pop(idx)
|
|
||||||
merge_happened = True
|
|
||||||
break
|
|
||||||
rects.insert(0, current_rect)
|
|
||||||
if not merge_happened:
|
|
||||||
current_idx += 1
|
|
||||||
elif merge_happened:
|
|
||||||
current_idx = 0
|
|
||||||
|
|
||||||
return rects
|
|
||||||
47
cv_analysis/utils/conversion.py
Normal file
47
cv_analysis/utils/conversion.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
import json
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
|
||||||
|
Image_t = Union[Image.Image, np.ndarray]
|
||||||
|
|
||||||
|
|
||||||
|
def contour_to_rectangle(contour):
|
||||||
|
return box_to_rectangle(cv2.boundingRect(contour))
|
||||||
|
|
||||||
|
|
||||||
|
def box_to_rectangle(box: Sequence[int]) -> Rectangle:
|
||||||
|
x, y, w, h = box
|
||||||
|
return Rectangle(x, y, x + w, y + h)
|
||||||
|
|
||||||
|
|
||||||
|
def rectangle_to_box(rectangle: Rectangle) -> Sequence[int]:
|
||||||
|
return [rectangle.x1, rectangle.y1, rectangle.width, rectangle.height]
|
||||||
|
|
||||||
|
|
||||||
|
class RectangleJSONEncoder(json.JSONEncoder):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
json.JSONEncoder.__init__(self, *args, **kwargs)
|
||||||
|
self._replacement_map = {}
|
||||||
|
|
||||||
|
def default(self, o):
|
||||||
|
if isinstance(o, Rectangle):
|
||||||
|
return {"x1": o.x1, "x2": o.x2, "y1": o.y1, "y2": o.y2}
|
||||||
|
else:
|
||||||
|
return json.JSONEncoder.default(self, o)
|
||||||
|
|
||||||
|
def encode(self, o):
|
||||||
|
result = json.JSONEncoder.encode(self, o)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_image_format_to_array(image: Image_t):
|
||||||
|
return np.array(image).astype(np.uint8) if isinstance(image, Image.Image) else image
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_image_format_to_pil(image: Image_t):
|
||||||
|
return Image.fromarray(image.astype(np.uint8)) if isinstance(image, np.ndarray) else image
|
||||||
@ -1,33 +1,51 @@
|
|||||||
import cv2
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from PIL import Image
|
||||||
|
from PIL.Image import Image as Image_t
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
from cv_analysis.utils.conversion import normalize_image_format_to_array
|
||||||
|
|
||||||
def show_image_cv2(image, maxdim=700):
|
|
||||||
|
def show_image(image, backend="mpl", **kwargs):
|
||||||
|
image = normalize_image_format_to_array(image)
|
||||||
|
if backend == "mpl":
|
||||||
|
show_image_mpl(image, **kwargs)
|
||||||
|
elif backend == "cv2":
|
||||||
|
show_image_cv2(image, **kwargs)
|
||||||
|
elif backend == "pil":
|
||||||
|
Image.fromarray(image).show()
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown backend: {backend}")
|
||||||
|
|
||||||
|
|
||||||
|
def show_image_cv2(image, maxdim=700, **kwargs):
|
||||||
h, w, c = image.shape
|
h, w, c = image.shape
|
||||||
maxhw = max(h, w)
|
maxhw = max(h, w)
|
||||||
if maxhw > maxdim:
|
if maxhw > maxdim:
|
||||||
ratio = maxdim / maxhw
|
ratio = maxdim / maxhw
|
||||||
h = int(h * ratio)
|
h = int(h * ratio)
|
||||||
w = int(w * ratio)
|
w = int(w * ratio)
|
||||||
img = cv2.resize(image, (h, w))
|
|
||||||
|
img = cv2.resize(image, (h, w))
|
||||||
cv2.imshow("", img)
|
cv2.imshow("", img)
|
||||||
cv2.waitKey(0)
|
cv2.waitKey(0)
|
||||||
cv2.destroyAllWindows()
|
cv2.destroyAllWindows()
|
||||||
|
|
||||||
|
|
||||||
def show_image_mpl(image):
|
def show_image_mpl(image, **kwargs):
|
||||||
|
if isinstance(image, Image_t):
|
||||||
|
# noinspection PyTypeChecker
|
||||||
|
image = np.array(image)
|
||||||
|
# noinspection PyArgumentList
|
||||||
|
assert image.max() <= 255
|
||||||
fig, ax = plt.subplots(1, 1)
|
fig, ax = plt.subplots(1, 1)
|
||||||
fig.set_size_inches(20, 20)
|
fig.set_size_inches(20, 20)
|
||||||
|
assert image.dtype == np.uint8
|
||||||
ax.imshow(image, cmap="gray")
|
ax.imshow(image, cmap="gray")
|
||||||
|
ax.title.set_text(kwargs.get("title", ""))
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
def show_image(image, backend="m"):
|
|
||||||
if backend.startswith("m"):
|
|
||||||
show_image_mpl(image)
|
|
||||||
else:
|
|
||||||
show_image_cv2(image)
|
|
||||||
|
|
||||||
|
|
||||||
def save_image(image, path):
|
def save_image(image, path):
|
||||||
cv2.imwrite(path, image)
|
cv2.imwrite(path, image)
|
||||||
|
|||||||
@ -1,19 +1,23 @@
|
|||||||
|
from typing import Union
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
from cv_analysis.utils import copy_and_normalize_channels
|
from cv_analysis.utils import copy_and_normalize_channels
|
||||||
|
|
||||||
|
|
||||||
def draw_contours(image, contours, color=None, annotate=False):
|
def draw_contours(image, contours):
|
||||||
|
|
||||||
image = copy_and_normalize_channels(image)
|
image = copy_and_normalize_channels(image)
|
||||||
|
|
||||||
for cont in contours:
|
for contour in contours:
|
||||||
cv2.drawContours(image, cont, -1, (0, 255, 0), 4)
|
cv2.drawContours(image, contour, -1, (0, 255, 0), 4)
|
||||||
|
|
||||||
return image
|
return image
|
||||||
|
|
||||||
|
|
||||||
def draw_rectangles(image, rectangles, color=None, annotate=False):
|
def draw_rectangles(image: Union[np.ndarray, Image.Image], rectangles, color=None, annotate=False, filled=False):
|
||||||
def annotate_rect(x, y, w, h):
|
def annotate_rect(x, y, w, h):
|
||||||
cv2.putText(
|
cv2.putText(
|
||||||
image,
|
image,
|
||||||
@ -21,18 +25,18 @@ def draw_rectangles(image, rectangles, color=None, annotate=False):
|
|||||||
(x + (w // 2) - 12, y + (h // 2) + 9),
|
(x + (w // 2) - 12, y + (h // 2) + 9),
|
||||||
cv2.FONT_HERSHEY_SIMPLEX,
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
1,
|
1,
|
||||||
(0, 255, 0),
|
(0, 255, 0, 255),
|
||||||
2,
|
2,
|
||||||
)
|
)
|
||||||
|
|
||||||
image = copy_and_normalize_channels(image)
|
image = copy_and_normalize_channels(image)
|
||||||
|
|
||||||
if not color:
|
if not color:
|
||||||
color = (0, 255, 0)
|
color = (0, 255, 0, 255)
|
||||||
|
|
||||||
for rect in rectangles:
|
for rect in rectangles:
|
||||||
x, y, w, h = rect
|
x, y, w, h = rect
|
||||||
cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
|
cv2.rectangle(image, (x, y), (x + w, y + h), color, -1 if filled else 1)
|
||||||
if annotate:
|
if annotate:
|
||||||
annotate_rect(x, y, w, h)
|
annotate_rect(x, y, w, h)
|
||||||
|
|
||||||
@ -5,7 +5,7 @@ def is_large_enough(cont, min_area):
|
|||||||
return cv2.contourArea(cont, False) > min_area
|
return cv2.contourArea(cont, False) > min_area
|
||||||
|
|
||||||
|
|
||||||
def is_not_too_large(cnt, max_area):
|
def is_small_enough(cnt, max_area):
|
||||||
return cv2.contourArea(cnt, False) < max_area
|
return cv2.contourArea(cnt, False) < max_area
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
13
cv_analysis/utils/geometric.py
Normal file
13
cv_analysis/utils/geometric.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
|
||||||
|
|
||||||
|
def is_square_like(box: Rectangle):
|
||||||
|
return box.width / box.height > 0.5 and box.height / box.width > 0.5
|
||||||
|
|
||||||
|
|
||||||
|
def is_wide(box: Rectangle):
|
||||||
|
return box.width / box.height > 1.5
|
||||||
|
|
||||||
|
|
||||||
|
def is_tall(box: Rectangle):
|
||||||
|
return box.height / box.width > 1.5
|
||||||
115
cv_analysis/utils/image_operations.py
Normal file
115
cv_analysis/utils/image_operations.py
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
from typing import Tuple
|
||||||
|
|
||||||
|
import cv2 as cv
|
||||||
|
import numpy as np
|
||||||
|
from PIL import ImageOps, Image
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from cv_analysis.utils.conversion import normalize_image_format_to_pil
|
||||||
|
|
||||||
|
Color = Tuple[int, int, int]
|
||||||
|
|
||||||
|
|
||||||
|
def blur(image: np.ndarray):
|
||||||
|
return cv.blur(image, (3, 3))
|
||||||
|
|
||||||
|
|
||||||
|
def sharpen(image: np.ndarray):
|
||||||
|
return cv.filter2D(image, -1, np.array([[-1, -1, -1], [-1, 6, -1], [-1, -1, -1]]))
|
||||||
|
|
||||||
|
|
||||||
|
def overlay(images, mode=np.sum):
|
||||||
|
assert mode in [np.sum, np.max]
|
||||||
|
images = np.stack(list(images))
|
||||||
|
image = mode(images, axis=0)
|
||||||
|
image = (image / image.max() * 255).astype(np.uint8)
|
||||||
|
return image
|
||||||
|
|
||||||
|
|
||||||
|
def tint_image(src, color="#FFFFFF"):
|
||||||
|
src.load()
|
||||||
|
r, g, b, alpha = src.split()
|
||||||
|
gray = ImageOps.grayscale(src)
|
||||||
|
result = ImageOps.colorize(gray, (0, 0, 0), color)
|
||||||
|
result.putalpha(alpha)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def color_shift_array(image: np.ndarray, color: Color):
|
||||||
|
"""Creates a 3-tensor from a 2-tensor by stacking the 2-tensor three times weighted by the color tuple."""
|
||||||
|
assert image.ndim == 3
|
||||||
|
assert image.shape[-1] == 3
|
||||||
|
assert isinstance(color, tuple)
|
||||||
|
assert max(color) <= 255
|
||||||
|
assert image.max() <= 255
|
||||||
|
|
||||||
|
color = np.array(color)
|
||||||
|
weights = color / color.sum() / 10
|
||||||
|
assert max(weights) <= 1
|
||||||
|
|
||||||
|
colored = (image * weights).astype(np.uint8)
|
||||||
|
|
||||||
|
assert colored.shape == image.shape
|
||||||
|
|
||||||
|
return colored
|
||||||
|
|
||||||
|
|
||||||
|
def superimpose(
|
||||||
|
base_image: Image,
|
||||||
|
image_to_superimpose: Image,
|
||||||
|
crop_to_content=True,
|
||||||
|
pad=True,
|
||||||
|
) -> Image:
|
||||||
|
"""Superimposes an image with transparency onto another image.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
base_image: The page image.
|
||||||
|
image_to_superimpose: The texture image.
|
||||||
|
crop_to_content: If True, the texture will be cropped to content (i.e. the bounding box of all non-transparent
|
||||||
|
parts of the texture image).
|
||||||
|
pad: If True, the texture will be padded to the size of the page.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Image where the texture is superimposed onto the page.
|
||||||
|
"""
|
||||||
|
base_image = normalize_image_format_to_pil(base_image)
|
||||||
|
image_to_superimpose = normalize_image_format_to_pil(image_to_superimpose)
|
||||||
|
|
||||||
|
if crop_to_content:
|
||||||
|
image_to_superimpose = image_to_superimpose.crop(image_to_superimpose.getbbox())
|
||||||
|
|
||||||
|
if base_image.size != image_to_superimpose.size:
|
||||||
|
logger.trace(f"Size of page and texture do not match: {base_image.size} != {image_to_superimpose.size}")
|
||||||
|
if pad:
|
||||||
|
logger.trace(f"Padding texture before pasting to fit size {base_image.size}")
|
||||||
|
image_to_superimpose = pad_image_to_size(image_to_superimpose, base_image.size)
|
||||||
|
else:
|
||||||
|
logger.trace(f"Resizing texture before pasting to fit size {base_image.size}")
|
||||||
|
image_to_superimpose = image_to_superimpose.resize(base_image.size)
|
||||||
|
|
||||||
|
assert base_image.size == image_to_superimpose.size
|
||||||
|
assert image_to_superimpose.mode == "RGBA"
|
||||||
|
|
||||||
|
base_image.paste(image_to_superimpose, (0, 0), image_to_superimpose)
|
||||||
|
return base_image
|
||||||
|
|
||||||
|
|
||||||
|
def pad_image_to_size(image: Image, size: Tuple[int, int]) -> Image:
|
||||||
|
"""Pads an image to a given size."""
|
||||||
|
if image.size == size:
|
||||||
|
return image
|
||||||
|
|
||||||
|
if image.size[0] > size[0] or image.size[1] > size[1]:
|
||||||
|
raise ValueError(f"Image size {image.size} is larger than target size {size}.")
|
||||||
|
|
||||||
|
padded = Image.new(image.mode, size, color=255)
|
||||||
|
|
||||||
|
pasting_coords = compute_pasting_coordinates(image, padded)
|
||||||
|
assert image.mode == "RGBA"
|
||||||
|
padded.paste(image, pasting_coords)
|
||||||
|
return padded
|
||||||
|
|
||||||
|
|
||||||
|
def compute_pasting_coordinates(smaller: Image, larger: Image.Image):
|
||||||
|
"""Computes the coordinates for centrally pasting a smaller image onto a larger image."""
|
||||||
|
return abs(larger.width - smaller.width) // 2, abs(larger.height - smaller.height) // 2
|
||||||
29
cv_analysis/utils/input.py
Normal file
29
cv_analysis/utils/input.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
from numpy import array, ndarray
|
||||||
|
import pdf2image
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
from cv_analysis.utils.preprocessing import preprocess_page_array
|
||||||
|
|
||||||
|
|
||||||
|
def open_analysis_input_file(path_or_bytes, first_page=1, last_page=None):
|
||||||
|
|
||||||
|
assert first_page > 0, "Page numbers are 1-based."
|
||||||
|
assert last_page is None or last_page >= first_page, "last_page must be greater than or equal to first_page."
|
||||||
|
|
||||||
|
last_page = last_page or first_page
|
||||||
|
|
||||||
|
if type(path_or_bytes) == str:
|
||||||
|
if path_or_bytes.lower().endswith((".png", ".jpg", ".jpeg")):
|
||||||
|
pages = [Image.open(path_or_bytes)]
|
||||||
|
elif path_or_bytes.lower().endswith(".pdf"):
|
||||||
|
pages = pdf2image.convert_from_path(path_or_bytes, first_page=first_page, last_page=last_page)
|
||||||
|
else:
|
||||||
|
raise IOError("Invalid file extension. Accepted filetypes: .png, .jpg, .jpeg, .pdf")
|
||||||
|
elif type(path_or_bytes) == bytes:
|
||||||
|
pages = pdf2image.convert_from_bytes(path_or_bytes, first_page=first_page, last_page=last_page)
|
||||||
|
elif type(path_or_bytes) in {list, ndarray}:
|
||||||
|
return path_or_bytes
|
||||||
|
|
||||||
|
pages = [preprocess_page_array(array(p)) for p in pages]
|
||||||
|
|
||||||
|
return pages
|
||||||
54
cv_analysis/utils/merging.py
Normal file
54
cv_analysis/utils/merging.py
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
from functools import reduce
|
||||||
|
from itertools import combinations
|
||||||
|
from typing import List, Tuple, Set
|
||||||
|
|
||||||
|
from funcy import all
|
||||||
|
|
||||||
|
from cv_analysis.utils import until, make_merger_sentinel
|
||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
from cv_analysis.utils.spacial import related
|
||||||
|
|
||||||
|
|
||||||
|
def merge_related_rectangles(rectangles: List[Rectangle]) -> List[Rectangle]:
|
||||||
|
"""Merges rectangles that are related to each other, iterating on partial merge results until no more mergers are
|
||||||
|
possible."""
|
||||||
|
assert isinstance(rectangles, list)
|
||||||
|
no_new_merges = make_merger_sentinel()
|
||||||
|
return until(no_new_merges, merge_rectangles_once, rectangles)
|
||||||
|
|
||||||
|
|
||||||
|
def merge_rectangles_once(rectangles: List[Rectangle]) -> List[Rectangle]:
|
||||||
|
"""Merges rectangles that are related to each other, but does not iterate on the results."""
|
||||||
|
rectangles = set(rectangles)
|
||||||
|
merged, used = reduce(merge_if_related, combinations(rectangles, 2), (set(), set()))
|
||||||
|
|
||||||
|
return list(merged | rectangles - used)
|
||||||
|
|
||||||
|
|
||||||
|
T = Tuple[Set[Rectangle], Set[Rectangle]]
|
||||||
|
V = Tuple[Rectangle, Rectangle]
|
||||||
|
|
||||||
|
|
||||||
|
def merge_if_related(merged_and_used_so_far: T, rectangle_pair: V) -> T:
|
||||||
|
"""Merges two rectangles if they are related, otherwise returns the accumulator unchanged."""
|
||||||
|
alpha, beta = rectangle_pair
|
||||||
|
merged, used = merged_and_used_so_far
|
||||||
|
|
||||||
|
def unused(*args) -> bool:
|
||||||
|
return not used & {*args}
|
||||||
|
|
||||||
|
if all(unused, (alpha, beta)) and related(alpha, beta):
|
||||||
|
return merged | {bounding_rect(alpha, beta)}, used | {alpha, beta}
|
||||||
|
|
||||||
|
else:
|
||||||
|
return merged, used
|
||||||
|
|
||||||
|
|
||||||
|
def bounding_rect(alpha: Rectangle, beta: Rectangle) -> Rectangle:
|
||||||
|
"""Returns the smallest rectangle that contains both rectangles."""
|
||||||
|
return Rectangle(
|
||||||
|
min(alpha.x1, beta.x1),
|
||||||
|
min(alpha.y1, beta.y1),
|
||||||
|
max(alpha.x2, beta.x2),
|
||||||
|
max(alpha.y2, beta.y2),
|
||||||
|
)
|
||||||
56
cv_analysis/utils/metrics.py
Normal file
56
cv_analysis/utils/metrics.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
from functools import reduce
|
||||||
|
from operator import itemgetter
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from funcy import lmap, lpluck, first
|
||||||
|
|
||||||
|
from cv_analysis.utils import lift
|
||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
|
||||||
|
|
||||||
|
def compute_document_score(result_dict, ground_truth_dicts):
|
||||||
|
|
||||||
|
extract_cells = lambda dicts: lpluck("cells", dicts["pages"])
|
||||||
|
|
||||||
|
cells_per_ground_truth_page, cells_per_result_page = map(extract_cells, (ground_truth_dicts, result_dict))
|
||||||
|
cells_on_page_to_rectangles = lift(rectangle_from_dict)
|
||||||
|
cells_on_pages_to_rectangles = lift(cells_on_page_to_rectangles)
|
||||||
|
|
||||||
|
rectangles_per_ground_truth_page, rectangles_per_result_page = map(
|
||||||
|
cells_on_pages_to_rectangles, (cells_per_ground_truth_page, cells_per_result_page)
|
||||||
|
)
|
||||||
|
|
||||||
|
scores = lmap(compute_page_iou, rectangles_per_result_page, rectangles_per_ground_truth_page)
|
||||||
|
|
||||||
|
n_cells_per_page = np.array(lmap(len, cells_per_ground_truth_page))
|
||||||
|
document_score = np.average(scores, weights=n_cells_per_page / n_cells_per_page.sum())
|
||||||
|
|
||||||
|
return document_score
|
||||||
|
|
||||||
|
|
||||||
|
def rectangle_from_dict(d):
|
||||||
|
x1, y1, w, h = itemgetter("x", "y", "width", "height")(d)
|
||||||
|
return Rectangle(x1, y1, x1 + w, y1 + h)
|
||||||
|
|
||||||
|
|
||||||
|
def compute_page_iou(predicted_rectangles: Iterable[Rectangle], true_rectangles: Iterable[Rectangle]):
|
||||||
|
def find_best_iou(sum_so_far_and_candidate_rectangles, true_rectangle):
|
||||||
|
sum_so_far, predicted_rectangles = sum_so_far_and_candidate_rectangles
|
||||||
|
best_match, best_iou = find_max_overlap(true_rectangle, predicted_rectangles)
|
||||||
|
return sum_so_far + best_iou, predicted_rectangles - {best_match}
|
||||||
|
|
||||||
|
predicted_rectangles = set(predicted_rectangles)
|
||||||
|
true_rectangles = set(true_rectangles)
|
||||||
|
|
||||||
|
iou_sum = first(reduce(find_best_iou, true_rectangles, (0, predicted_rectangles)))
|
||||||
|
normalizing_factor = 1 / max(len(predicted_rectangles), len(true_rectangles))
|
||||||
|
score = normalizing_factor * iou_sum
|
||||||
|
|
||||||
|
return score
|
||||||
|
|
||||||
|
|
||||||
|
def find_max_overlap(rectangle: Rectangle, candidate_rectangles: Iterable[Rectangle]):
|
||||||
|
best_candidate_rectangle = max(candidate_rectangles, key=rectangle.iou)
|
||||||
|
iou = rectangle.iou(best_candidate_rectangle)
|
||||||
|
return best_candidate_rectangle, iou
|
||||||
38
cv_analysis/utils/morphing.py
Normal file
38
cv_analysis/utils/morphing.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
from typing import Tuple
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from cv_analysis.utils.image_operations import compute_pasting_coordinates
|
||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
from synthesis.segment.content_rectangle import ContentRectangle
|
||||||
|
|
||||||
|
|
||||||
|
def shrink_rectangle(rectangle: Rectangle, factor: float) -> Rectangle:
|
||||||
|
x1, y1, x2, y2 = compute_scaled_coordinates(rectangle, (1 - factor))
|
||||||
|
|
||||||
|
logger.trace(f"Shrinking {rectangle} by {factor} to ({x1}, {y1}, {x2}, {y2}).")
|
||||||
|
|
||||||
|
assert x1 >= rectangle.x1
|
||||||
|
assert y1 >= rectangle.y1
|
||||||
|
assert x2 <= rectangle.x2
|
||||||
|
assert y2 <= rectangle.y2
|
||||||
|
|
||||||
|
shrunk_rectangle = Rectangle(x1, y1, x2, y2)
|
||||||
|
|
||||||
|
if isinstance(rectangle, ContentRectangle): # TODO: Refactor
|
||||||
|
shrunk_rectangle = ContentRectangle(*shrunk_rectangle.coords, rectangle.content)
|
||||||
|
|
||||||
|
return shrunk_rectangle
|
||||||
|
|
||||||
|
|
||||||
|
def compute_scaled_coordinates(rectangle: Rectangle, factor: float) -> Tuple[int, int, int, int]:
|
||||||
|
# FIXME: Refactor: Using image to compute coordinates is not clean
|
||||||
|
image = Image.new("RGBA", (rectangle.width, rectangle.height))
|
||||||
|
scaled = image.resize((int(rectangle.width * factor), int(rectangle.height * factor)))
|
||||||
|
|
||||||
|
x1, y1 = compute_pasting_coordinates(scaled, image)
|
||||||
|
x1 = rectangle.x1 + x1
|
||||||
|
y1 = rectangle.y1 + y1
|
||||||
|
x2, y2 = x1 + scaled.width, y1 + scaled.height
|
||||||
|
return x1, y1, x2, y2
|
||||||
@ -1,27 +0,0 @@
|
|||||||
from numpy import array, ndarray
|
|
||||||
import pdf2image
|
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
from cv_analysis.utils.preprocessing import preprocess_page_array
|
|
||||||
|
|
||||||
|
|
||||||
def open_pdf(pdf, first_page=0, last_page=None):
|
|
||||||
|
|
||||||
first_page += 1
|
|
||||||
last_page = None if last_page is None else last_page + 1
|
|
||||||
|
|
||||||
if type(pdf) == str:
|
|
||||||
if pdf.lower().endswith((".png", ".jpg", ".jpeg")):
|
|
||||||
pages = [Image.open(pdf)]
|
|
||||||
elif pdf.lower().endswith(".pdf"):
|
|
||||||
pages = pdf2image.convert_from_path(pdf, first_page=first_page, last_page=last_page)
|
|
||||||
else:
|
|
||||||
raise IOError("Invalid file extension. Accepted filetypes:\n\t.png\n\t.jpg\n\t.jpeg\n\t.pdf")
|
|
||||||
elif type(pdf) == bytes:
|
|
||||||
pages = pdf2image.convert_from_bytes(pdf, first_page=first_page, last_page=last_page)
|
|
||||||
elif type(pdf) in {list, ndarray}:
|
|
||||||
return pdf
|
|
||||||
|
|
||||||
pages = [preprocess_page_array(array(p)) for p in pages]
|
|
||||||
|
|
||||||
return pages
|
|
||||||
@ -1,15 +1,17 @@
|
|||||||
from collections import namedtuple
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from itertools import starmap, compress
|
from itertools import starmap, compress
|
||||||
from typing import Iterable, List
|
from typing import Iterable, List, Sequence
|
||||||
from cv_analysis.utils.structures import Rectangle
|
|
||||||
|
from funcy import lremove
|
||||||
|
|
||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
|
||||||
|
|
||||||
def remove_overlapping(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
|
def remove_overlapping(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
|
||||||
def overlap(a: Rectangle, rect2: Rectangle) -> float:
|
def overlap(a: Rectangle, rect2: Rectangle) -> float:
|
||||||
return a.intersection(rect2) > 0
|
return a.intersection(rect2) > 0
|
||||||
|
|
||||||
def does_not_overlap(rect: Rectangle, rectangles: Iterable[Rectangle]) -> list:
|
def does_not_overlap(rect: Rectangle, rectangles: Iterable[Rectangle]) -> bool:
|
||||||
return not any(overlap(rect, rect2) for rect2 in rectangles if not rect == rect2)
|
return not any(overlap(rect, rect2) for rect2 in rectangles if not rect == rect2)
|
||||||
|
|
||||||
rectangles = list(filter(partial(does_not_overlap, rectangles=rectangles), rectangles))
|
rectangles = list(filter(partial(does_not_overlap, rectangles=rectangles), rectangles))
|
||||||
@ -17,15 +19,28 @@ def remove_overlapping(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
|
|||||||
|
|
||||||
|
|
||||||
def remove_included(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
|
def remove_included(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
|
||||||
keep = [rect for rect in rectangles if not rect.is_included(rectangles)]
|
rectangles_to_keep = [rect for rect in rectangles if not rect.is_included(rectangles)]
|
||||||
return keep
|
return rectangles_to_keep
|
||||||
|
|
||||||
|
|
||||||
|
def remove_small(boxes: Iterable[Rectangle], page_width, page_height, min_percentage=0.13) -> List[Rectangle]:
|
||||||
|
min_width = page_width * min_percentage
|
||||||
|
min_height = page_height * min_percentage
|
||||||
|
|
||||||
|
def small(box: Rectangle):
|
||||||
|
return box.width < min_width or box.height < min_height
|
||||||
|
|
||||||
|
return lremove(small, boxes)
|
||||||
|
|
||||||
|
|
||||||
def __remove_isolated_unsorted(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
|
def __remove_isolated_unsorted(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
|
||||||
def is_connected(rect: Rectangle, rectangles: Iterable[Rectangle]):
|
def is_connected(rect: Rectangle, rectangles: Iterable[Rectangle]):
|
||||||
return any(rect.adjacent(rect2) for rect2 in rectangles if not rect == rect2)
|
return any(rect.adjacent(rect2) for rect2 in rectangles if not rect == rect2)
|
||||||
|
|
||||||
rectangles = list(filter(partial(is_connected, rectangles=list(rectangles)), rectangles))
|
if not isinstance(rectangles, list):
|
||||||
|
rectangles = list(rectangles)
|
||||||
|
|
||||||
|
rectangles = list(filter(partial(is_connected, rectangles=rectangles), rectangles))
|
||||||
return rectangles
|
return rectangles
|
||||||
|
|
||||||
|
|
||||||
@ -42,9 +57,9 @@ def __remove_isolated_sorted(rectangles: Iterable[Rectangle]) -> List[Rectangle]
|
|||||||
return rectangles
|
return rectangles
|
||||||
|
|
||||||
|
|
||||||
def remove_isolated(rectangles: Iterable[Rectangle], input_unsorted=True) -> List[Rectangle]:
|
def remove_isolated(rectangles: Iterable[Rectangle], input_unsorted: bool = True) -> List[Rectangle]:
|
||||||
return (__remove_isolated_unsorted if input_unsorted else __remove_isolated_sorted)(rectangles)
|
return (__remove_isolated_unsorted if input_unsorted else __remove_isolated_sorted)(rectangles)
|
||||||
|
|
||||||
|
|
||||||
def has_no_parent(hierarchy):
|
def has_no_parent(hierarchy: Sequence[int]) -> bool:
|
||||||
return hierarchy[-1] <= 0
|
return hierarchy[-1] <= 0
|
||||||
|
|||||||
85
cv_analysis/utils/rectangle.py
Normal file
85
cv_analysis/utils/rectangle.py
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
# See https://stackoverflow.com/a/33533514
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Iterable, Union
|
||||||
|
|
||||||
|
from funcy import identity
|
||||||
|
|
||||||
|
from cv_analysis.utils.spacial import adjacent, contains, intersection, iou, area, is_contained
|
||||||
|
|
||||||
|
Coord = Union[int, float]
|
||||||
|
|
||||||
|
|
||||||
|
class Rectangle:
|
||||||
|
def __init__(self, x1, y1, x2, y2, discrete=True):
|
||||||
|
"""Creates a rectangle from two points."""
|
||||||
|
nearest_valid = int if discrete else identity
|
||||||
|
|
||||||
|
self.__x1 = nearest_valid(x1)
|
||||||
|
self.__y1 = nearest_valid(y1)
|
||||||
|
self.__x2 = nearest_valid(x2)
|
||||||
|
self.__y2 = nearest_valid(y2)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"Rectangle({self.x1}, {self.y1}, {self.x2}, {self.y2})"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def x1(self):
|
||||||
|
return self.__x1
|
||||||
|
|
||||||
|
@property
|
||||||
|
def x2(self):
|
||||||
|
return self.__x2
|
||||||
|
|
||||||
|
@property
|
||||||
|
def y1(self):
|
||||||
|
return self.__y1
|
||||||
|
|
||||||
|
@property
|
||||||
|
def y2(self):
|
||||||
|
return self.__y2
|
||||||
|
|
||||||
|
@property
|
||||||
|
def width(self):
|
||||||
|
return abs(self.x2 - self.x1)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def height(self):
|
||||||
|
return abs(self.y2 - self.y1)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def coords(self):
|
||||||
|
return [self.x1, self.y1, self.x2, self.y2]
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash((self.x1, self.y1, self.x2, self.y2))
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
yield self.x1
|
||||||
|
yield self.y1
|
||||||
|
yield self.width
|
||||||
|
yield self.height
|
||||||
|
|
||||||
|
def area(self):
|
||||||
|
"""Calculates the area of this rectangle."""
|
||||||
|
return area(self)
|
||||||
|
|
||||||
|
def intersection(self, other):
|
||||||
|
"""Calculates the intersection of this and the given other rectangle."""
|
||||||
|
return intersection(self, other)
|
||||||
|
|
||||||
|
def iou(self, other: Rectangle):
|
||||||
|
"""Calculates the intersection over union of this and the given other rectangle."""
|
||||||
|
return iou(self, other)
|
||||||
|
|
||||||
|
def includes(self, other: Rectangle, tol=3):
|
||||||
|
"""Checks if this rectangle contains the given other."""
|
||||||
|
return contains(self, other, tol)
|
||||||
|
|
||||||
|
def is_included(self, rectangles: Iterable[Rectangle]):
|
||||||
|
"""Checks if this rectangle is contained by any of the given rectangles."""
|
||||||
|
return is_contained(self, rectangles)
|
||||||
|
|
||||||
|
def adjacent(self, other: Rectangle, tolerance=7):
|
||||||
|
"""Checks if this rectangle is adjacent to the given other."""
|
||||||
|
return adjacent(self, other, tolerance)
|
||||||
286
cv_analysis/utils/spacial.py
Normal file
286
cv_analysis/utils/spacial.py
Normal file
@ -0,0 +1,286 @@
|
|||||||
|
# See https://stackoverflow.com/a/39757388
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from functools import lru_cache
|
||||||
|
from operator import attrgetter
|
||||||
|
from typing import TYPE_CHECKING, Iterable
|
||||||
|
|
||||||
|
from funcy import juxt, rpartial, compose, lflatten, first, second
|
||||||
|
|
||||||
|
from cv_analysis.utils import lift
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
|
||||||
|
|
||||||
|
def adjacent(alpha: Rectangle, beta: Rectangle, tolerance=7, strict=False):
|
||||||
|
"""Checks if the two rectangles are adjacent to each other.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
alpha: The first rectangle.
|
||||||
|
beta: The second rectangle.
|
||||||
|
tolerance: The maximum distance between the two rectangles.
|
||||||
|
strict: If True, the rectangles must be adjacent along one axis and contained within the other axis. Else, the
|
||||||
|
rectangles must be adjacent along one axis and overlapping the other axis.
|
||||||
|
Returns:
|
||||||
|
True if the two rectangles are adjacent to each other, False otherwise.
|
||||||
|
"""
|
||||||
|
select_strictness_variant = first if strict else second
|
||||||
|
test_candidates = [
|
||||||
|
# +---+
|
||||||
|
# | | +---+
|
||||||
|
# | a | | b |
|
||||||
|
# | | +___+
|
||||||
|
# +___+
|
||||||
|
(right_left_aligned_and_vertically_contained, right_left_aligned_and_vertically_overlapping),
|
||||||
|
# +---+
|
||||||
|
# +---+ | |
|
||||||
|
# | b | | a |
|
||||||
|
# +___+ | |
|
||||||
|
# +___+
|
||||||
|
(left_right_aligned_and_vertically_contained, left_right_aligned_and_vertically_overlapping),
|
||||||
|
# +-----------+
|
||||||
|
# | a |
|
||||||
|
# +___________+
|
||||||
|
# +-----+
|
||||||
|
# | b |
|
||||||
|
# +_____+
|
||||||
|
(bottom_top_aligned_and_horizontally_contained, bottom_top_aligned_and_horizontally_overlapping),
|
||||||
|
# +-----+
|
||||||
|
# | b |
|
||||||
|
# +_____+
|
||||||
|
# +-----------+
|
||||||
|
# | a |
|
||||||
|
# +___________+
|
||||||
|
(top_bottom_aligned_and_horizontally_contained, top_bottom_aligned_and_horizontally_overlapping),
|
||||||
|
]
|
||||||
|
|
||||||
|
tests = map(select_strictness_variant, test_candidates)
|
||||||
|
return any(juxt(*tests)(alpha, beta, tolerance))
|
||||||
|
|
||||||
|
|
||||||
|
def right_left_aligned_and_vertically_overlapping(alpha: Rectangle, beta: Rectangle, tol):
|
||||||
|
"""Checks if the first rectangle is left of the other within a tolerance and also overlaps the other's y range."""
|
||||||
|
return adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
|
||||||
|
alpha.x2, beta.x1, beta.y1, beta.y2, alpha.y1, alpha.y2, tolerance=tol
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def left_right_aligned_and_vertically_overlapping(alpha: Rectangle, beta: Rectangle, tol):
|
||||||
|
"""Checks if the first rectangle is right of the other within a tolerance and also overlaps the other's y range."""
|
||||||
|
return adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
|
||||||
|
alpha.x1, beta.x2, beta.y1, beta.y2, alpha.y1, alpha.y2, tolerance=tol
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def bottom_top_aligned_and_horizontally_overlapping(alpha: Rectangle, beta: Rectangle, tol):
|
||||||
|
"""Checks if the first rectangle is above the other within a tolerance and also overlaps the other's x range."""
|
||||||
|
return adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
|
||||||
|
alpha.y2, beta.y1, beta.x1, beta.x2, alpha.x1, alpha.x2, tolerance=tol
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def top_bottom_aligned_and_horizontally_overlapping(alpha: Rectangle, beta: Rectangle, tol):
|
||||||
|
"""Checks if the first rectangle is below the other within a tolerance and also overlaps the other's x range."""
|
||||||
|
return adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
|
||||||
|
alpha.y1, beta.y2, beta.x1, beta.x2, alpha.x1, alpha.x2, tolerance=tol
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def right_left_aligned_and_vertically_contained(alpha: Rectangle, beta: Rectangle, tol):
|
||||||
|
"""Checks if the first rectangle is left of the other within a tolerance and also contains the other's y range."""
|
||||||
|
return adjacent_along_one_axis_and_contained_within_perpendicular_axis(
|
||||||
|
alpha.x2, beta.x1, beta.y1, beta.y2, alpha.y1, alpha.y2, tolerance=tol
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def left_right_aligned_and_vertically_contained(alpha: Rectangle, beta: Rectangle, tol):
|
||||||
|
"""Checks if the first rectangle is right of the other within a tolerance and also contains the other's y range."""
|
||||||
|
return adjacent_along_one_axis_and_contained_within_perpendicular_axis(
|
||||||
|
alpha.x1, beta.x2, beta.y1, beta.y2, alpha.y1, alpha.y2, tolerance=tol
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def bottom_top_aligned_and_horizontally_contained(alpha: Rectangle, beta: Rectangle, tol):
|
||||||
|
"""Checks if the first rectangle is above the other within a tolerance and also contains the other's x range."""
|
||||||
|
return adjacent_along_one_axis_and_contained_within_perpendicular_axis(
|
||||||
|
alpha.y2, beta.y1, beta.x1, beta.x2, alpha.x1, alpha.x2, tolerance=tol
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def top_bottom_aligned_and_horizontally_contained(alpha: Rectangle, beta: Rectangle, tol):
|
||||||
|
"""Checks if the first rectangle is below the other within a tolerance and also contains the other's x range."""
|
||||||
|
return adjacent_along_one_axis_and_contained_within_perpendicular_axis(
|
||||||
|
alpha.y1, beta.y2, beta.x1, beta.x2, alpha.x1, alpha.x2, tolerance=tol
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
|
||||||
|
axis_0_point_1,
|
||||||
|
axis_1_point_2,
|
||||||
|
axis_1_contained_point_1,
|
||||||
|
axis_1_contained_point_2,
|
||||||
|
axis_1_lower_bound,
|
||||||
|
axis_1_upper_bound,
|
||||||
|
tolerance,
|
||||||
|
):
|
||||||
|
"""Checks if two points are adjacent along one axis and two other points overlap a range along the perpendicular
|
||||||
|
axis.
|
||||||
|
"""
|
||||||
|
return adjacent_along_one_axis_and_overlapping_or_contained_along_perpendicular_axis(
|
||||||
|
axis_0_point_1,
|
||||||
|
axis_1_point_2,
|
||||||
|
axis_1_contained_point_1,
|
||||||
|
axis_1_contained_point_2,
|
||||||
|
axis_1_lower_bound,
|
||||||
|
axis_1_upper_bound,
|
||||||
|
tolerance,
|
||||||
|
mode="overlapping",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def adjacent_along_one_axis_and_contained_within_perpendicular_axis(
|
||||||
|
axis_0_point_1,
|
||||||
|
axis_1_point_2,
|
||||||
|
axis_1_contained_point_1,
|
||||||
|
axis_1_contained_point_2,
|
||||||
|
axis_1_lower_bound,
|
||||||
|
axis_1_upper_bound,
|
||||||
|
tolerance,
|
||||||
|
):
|
||||||
|
"""Checks if two points are adjacent along one axis and two other points overlap a range along the perpendicular
|
||||||
|
axis.
|
||||||
|
"""
|
||||||
|
return adjacent_along_one_axis_and_overlapping_or_contained_along_perpendicular_axis(
|
||||||
|
axis_0_point_1,
|
||||||
|
axis_1_point_2,
|
||||||
|
axis_1_contained_point_1,
|
||||||
|
axis_1_contained_point_2,
|
||||||
|
axis_1_lower_bound,
|
||||||
|
axis_1_upper_bound,
|
||||||
|
tolerance,
|
||||||
|
mode="contained",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def adjacent_along_one_axis_and_overlapping_or_contained_along_perpendicular_axis(
|
||||||
|
axis_0_point_1,
|
||||||
|
axis_1_point_2,
|
||||||
|
axis_1_contained_point_1,
|
||||||
|
axis_1_contained_point_2,
|
||||||
|
axis_1_lower_bound,
|
||||||
|
axis_1_upper_bound,
|
||||||
|
tolerance,
|
||||||
|
mode,
|
||||||
|
):
|
||||||
|
"""Checks if two points are adjacent along one axis and two other points overlap a range along the perpendicular
|
||||||
|
axis or are contained in that range, depending on the mode specified.
|
||||||
|
"""
|
||||||
|
assert mode in ["overlapping", "contained"]
|
||||||
|
quantifier = any if mode == "overlapping" else all
|
||||||
|
return all(
|
||||||
|
[
|
||||||
|
abs(axis_0_point_1 - axis_1_point_2) <= tolerance,
|
||||||
|
quantifier(
|
||||||
|
[
|
||||||
|
axis_1_lower_bound <= p <= axis_1_upper_bound
|
||||||
|
for p in [axis_1_contained_point_1, axis_1_contained_point_2]
|
||||||
|
]
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def contains(alpha: Rectangle, beta: Rectangle, tol=3):
|
||||||
|
"""Checks if the first rectangle contains the second rectangle."""
|
||||||
|
return (
|
||||||
|
beta.x1 + tol >= alpha.x1
|
||||||
|
and beta.y1 + tol >= alpha.y1
|
||||||
|
and beta.x2 - tol <= alpha.x2
|
||||||
|
and beta.y2 - tol <= alpha.y2
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def is_contained(rectangle: Rectangle, rectangles: Iterable[Rectangle]):
|
||||||
|
"""Checks if the rectangle is contained within any of the other rectangles."""
|
||||||
|
other_rectangles = filter(lambda r: r != rectangle, rectangles)
|
||||||
|
return any(map(rpartial(contains, rectangle), other_rectangles))
|
||||||
|
|
||||||
|
|
||||||
|
def iou(alpha: Rectangle, beta: Rectangle):
|
||||||
|
"""Calculates the intersection area over the union area of two rectangles."""
|
||||||
|
return intersection(alpha, beta) / union(alpha, beta)
|
||||||
|
|
||||||
|
|
||||||
|
def area(rectangle: Rectangle):
|
||||||
|
"""Calculates the area of a rectangle."""
|
||||||
|
return abs((rectangle.x2 - rectangle.x1) * (rectangle.y2 - rectangle.y1))
|
||||||
|
|
||||||
|
|
||||||
|
def union(alpha: Rectangle, beta: Rectangle):
|
||||||
|
"""Calculates the union area of two rectangles."""
|
||||||
|
return area(alpha) + area(beta) - intersection(alpha, beta)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=1000)
|
||||||
|
def intersection(alpha, beta):
|
||||||
|
"""Calculates the intersection of two rectangles."""
|
||||||
|
return intersection_along_x_axis(alpha, beta) * intersection_along_y_axis(alpha, beta)
|
||||||
|
|
||||||
|
|
||||||
|
def intersection_along_x_axis(alpha, beta):
|
||||||
|
"""Calculates the intersection along the x-axis."""
|
||||||
|
return intersection_along_axis(alpha, beta, "x")
|
||||||
|
|
||||||
|
|
||||||
|
def intersection_along_y_axis(alpha, beta):
|
||||||
|
"""Calculates the intersection along the y-axis."""
|
||||||
|
return intersection_along_axis(alpha, beta, "y")
|
||||||
|
|
||||||
|
|
||||||
|
def intersection_along_axis(alpha, beta, axis):
|
||||||
|
"""Calculates the intersection along the given axis.
|
||||||
|
|
||||||
|
Cases:
|
||||||
|
a b
|
||||||
|
[-----] (---) ==> [a1, b1, a2, b2] ==> max(0, (a2 - b1)) = 0
|
||||||
|
b a
|
||||||
|
(---) [-----] ==> [b1, a1, b2, a2] ==> max(0, (b2 - a1)) = 0
|
||||||
|
a b
|
||||||
|
[--(----]----) ==> [a1, b1, a2, b2] ==> max(0, (a2 - b1)) = (a2 - b1)
|
||||||
|
a b
|
||||||
|
(-[---]----) ==> [b1, a1, a2, b2] ==> max(0, (a2 - a1)) = (a2 - a1)
|
||||||
|
b a
|
||||||
|
[-(---)----] ==> [a1, b1, b2, a2] ==> max(0, (b2 - b1)) = (b2 - b1)
|
||||||
|
b a
|
||||||
|
(----[--)----] ==> [b1, a1, b2, a2] ==> max(0, (b2 - a1)) = (b2 - a1)
|
||||||
|
"""
|
||||||
|
assert axis in ["x", "y"]
|
||||||
|
|
||||||
|
def get_component_accessor(component):
|
||||||
|
"""Returns a function that accesses the given component of a rectangle."""
|
||||||
|
return attrgetter(f"{axis}{component}")
|
||||||
|
|
||||||
|
def make_access_components_and_sort_fn(component):
|
||||||
|
"""Returns a function that accesses and sorts the given component of multiple rectangles."""
|
||||||
|
assert component in [1, 2]
|
||||||
|
return compose(sorted, lift(get_component_accessor(component)))
|
||||||
|
|
||||||
|
sort_first_components, sort_second_components = map(make_access_components_and_sort_fn, [1, 2])
|
||||||
|
|
||||||
|
min_c1, max_c1, min_c2, max_c2 = lflatten(juxt(sort_first_components, sort_second_components)((alpha, beta)))
|
||||||
|
intersection = max(0, min_c2 - max_c1)
|
||||||
|
return intersection
|
||||||
|
|
||||||
|
|
||||||
|
def related(alpha: Rectangle, beta: Rectangle):
|
||||||
|
return close(alpha, beta) or overlap(alpha, beta)
|
||||||
|
|
||||||
|
|
||||||
|
def close(alpha: Rectangle, beta: Rectangle, max_gap=14):
|
||||||
|
# FIXME: Parameterize via factory
|
||||||
|
return adjacent(alpha, beta, tolerance=max_gap, strict=True)
|
||||||
|
|
||||||
|
|
||||||
|
def overlap(alpha: Rectangle, beta: Rectangle):
|
||||||
|
return intersection(alpha, beta) > 0
|
||||||
@ -1,131 +0,0 @@
|
|||||||
from json import dumps
|
|
||||||
|
|
||||||
from typing import Iterable
|
|
||||||
import numpy as np
|
|
||||||
from funcy import identity
|
|
||||||
|
|
||||||
|
|
||||||
class Rectangle:
|
|
||||||
def __init__(self, x1=None, y1=None, w=None, h=None, x2=None, y2=None, indent=4, format="xywh", discrete=True):
|
|
||||||
make_discrete = int if discrete else identity
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.x1 = make_discrete(x1)
|
|
||||||
self.y1 = make_discrete(y1)
|
|
||||||
self.w = make_discrete(w) if w else make_discrete(x2 - x1)
|
|
||||||
self.h = make_discrete(h) if h else make_discrete(y2 - y1)
|
|
||||||
self.x2 = make_discrete(x2) if x2 else self.x1 + self.w
|
|
||||||
self.y2 = make_discrete(y2) if y2 else self.y1 + self.h
|
|
||||||
assert np.isclose(self.x1 + self.w, self.x2)
|
|
||||||
assert np.isclose(self.y1 + self.h, self.y2)
|
|
||||||
self.indent = indent
|
|
||||||
self.format = format
|
|
||||||
except Exception as err:
|
|
||||||
raise Exception("x1, y1, (w|x2), and (h|y2) must be defined.") from err
|
|
||||||
|
|
||||||
def json_xywh(self):
|
|
||||||
return {"x": self.x1, "y": self.y1, "width": self.w, "height": self.h}
|
|
||||||
|
|
||||||
def json_xyxy(self):
|
|
||||||
return {"x1": self.x1, "y1": self.y1, "x2": self.x2, "y2": self.y2}
|
|
||||||
|
|
||||||
def json_full(self):
|
|
||||||
# TODO: can we make all coords x0, y0 based? :)
|
|
||||||
return {
|
|
||||||
"x0": self.x1,
|
|
||||||
"y0": self.y1,
|
|
||||||
"x1": self.x2,
|
|
||||||
"y1": self.y2,
|
|
||||||
"width": self.w,
|
|
||||||
"height": self.h,
|
|
||||||
}
|
|
||||||
|
|
||||||
def json(self):
|
|
||||||
json_func = {"xywh": self.json_xywh, "xyxy": self.json_xyxy}.get(self.format, self.json_full)
|
|
||||||
return json_func()
|
|
||||||
|
|
||||||
def xyxy(self):
|
|
||||||
return self.x1, self.y1, self.x2, self.y2
|
|
||||||
|
|
||||||
def xywh(self):
|
|
||||||
return self.x1, self.y1, self.w, self.h
|
|
||||||
|
|
||||||
def intersection(self, rect):
|
|
||||||
bx1, by1, bx2, by2 = rect.xyxy()
|
|
||||||
if (self.x1 > bx2) or (bx1 > self.x2) or (self.y1 > by2) or (by1 > self.y2):
|
|
||||||
return 0
|
|
||||||
intersection_ = (min(self.x2, bx2) - max(self.x1, bx1)) * (min(self.y2, by2) - max(self.y1, by1))
|
|
||||||
return intersection_
|
|
||||||
|
|
||||||
def area(self):
|
|
||||||
return (self.x2 - self.x1) * (self.y2 - self.y1)
|
|
||||||
|
|
||||||
def iou(self, rect):
|
|
||||||
intersection = self.intersection(rect)
|
|
||||||
if intersection == 0:
|
|
||||||
return 0
|
|
||||||
union = self.area() + rect.area() - intersection
|
|
||||||
return intersection / union
|
|
||||||
|
|
||||||
def includes(self, other: "Rectangle", tol=3):
|
|
||||||
"""does a include b?"""
|
|
||||||
return (
|
|
||||||
other.x1 + tol >= self.x1
|
|
||||||
and other.y1 + tol >= self.y1
|
|
||||||
and other.x2 - tol <= self.x2
|
|
||||||
and other.y2 - tol <= self.y2
|
|
||||||
)
|
|
||||||
|
|
||||||
def is_included(self, rectangles: Iterable["Rectangle"]):
|
|
||||||
return any(rect.includes(self) for rect in rectangles if not rect == self)
|
|
||||||
|
|
||||||
def adjacent(self, rect2: "Rectangle", tolerance=7):
|
|
||||||
# tolerance=1 was set too low; most lines are 2px wide
|
|
||||||
def adjacent2d(sixtuple):
|
|
||||||
g, h, i, j, k, l = sixtuple
|
|
||||||
return (abs(g - h) <= tolerance) and any(k <= p <= l for p in [i, j])
|
|
||||||
|
|
||||||
if rect2 is None:
|
|
||||||
return False
|
|
||||||
return any(
|
|
||||||
map(
|
|
||||||
adjacent2d,
|
|
||||||
[
|
|
||||||
(self.x2, rect2.x1, rect2.y1, rect2.y2, self.y1, self.y2),
|
|
||||||
(self.x1, rect2.x2, rect2.y1, rect2.y2, self.y1, self.y2),
|
|
||||||
(self.y2, rect2.y1, rect2.x1, rect2.x2, self.x1, self.x2),
|
|
||||||
(self.y1, rect2.y2, rect2.x1, rect2.x2, self.x1, self.x2),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_xyxy(cls, xyxy_tuple, discrete=True):
|
|
||||||
x1, y1, x2, y2 = xyxy_tuple
|
|
||||||
return cls(x1=x1, y1=y1, x2=x2, y2=y2, discrete=discrete)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_xywh(cls, xywh_tuple, discrete=True):
|
|
||||||
x, y, w, h = xywh_tuple
|
|
||||||
return cls(x1=x, y1=y, w=w, h=h, discrete=discrete)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_dict_xywh(cls, xywh_dict, discrete=True):
|
|
||||||
return cls(x1=xywh_dict["x"], y1=xywh_dict["y"], w=xywh_dict["width"], h=xywh_dict["height"], discrete=discrete)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return dumps(self.json(), indent=self.indent)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return str(self.json())
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return list(self.json().values()).__iter__()
|
|
||||||
|
|
||||||
def __eq__(self, rect):
|
|
||||||
return all([self.x1 == rect.x1, self.y1 == rect.y1, self.w == rect.w, self.h == rect.h])
|
|
||||||
|
|
||||||
|
|
||||||
class Contour:
|
|
||||||
def __init__(self):
|
|
||||||
pass
|
|
||||||
@ -1,61 +0,0 @@
|
|||||||
from typing import Iterable
|
|
||||||
import numpy as np
|
|
||||||
from cv_analysis.utils.structures import Rectangle
|
|
||||||
|
|
||||||
|
|
||||||
def find_max_overlap(box: Rectangle, box_list: Iterable[Rectangle]):
|
|
||||||
best_candidate = max(box_list, key=lambda x: box.iou(x))
|
|
||||||
iou = box.iou(best_candidate)
|
|
||||||
return best_candidate, iou
|
|
||||||
|
|
||||||
|
|
||||||
def compute_page_iou(results_boxes: Iterable[Rectangle], ground_truth_boxes: Iterable[Rectangle]):
|
|
||||||
results = list(results_boxes)
|
|
||||||
truth = list(ground_truth_boxes)
|
|
||||||
if (not results) or (not truth):
|
|
||||||
return 0
|
|
||||||
iou_sum = 0
|
|
||||||
denominator = max(len(results), len(truth))
|
|
||||||
while results and truth:
|
|
||||||
gt_box = truth.pop()
|
|
||||||
best_match, best_iou = find_max_overlap(gt_box, results)
|
|
||||||
results.remove(best_match)
|
|
||||||
iou_sum += best_iou
|
|
||||||
score = iou_sum / denominator
|
|
||||||
return score
|
|
||||||
|
|
||||||
|
|
||||||
def compute_document_score(results_dict, annotation_dict):
|
|
||||||
|
|
||||||
page_weights = np.array([len(page["cells"]) for page in annotation_dict["pages"]])
|
|
||||||
page_weights = page_weights / sum(page_weights)
|
|
||||||
|
|
||||||
scores = []
|
|
||||||
for i in range(len(annotation_dict["pages"])):
|
|
||||||
scores.append(
|
|
||||||
compute_page_iou(
|
|
||||||
map(Rectangle.from_dict_xywh, results_dict["pages"][i]["cells"]),
|
|
||||||
map(Rectangle.from_dict_xywh, annotation_dict["pages"][i]["cells"]),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
doc_score = np.average(np.array(scores), weights=page_weights)
|
|
||||||
|
|
||||||
return doc_score
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
from cv_analysis.utils.test_metrics import *
|
|
||||||
|
|
||||||
r1 = Rectangle.from_dict_xywh({'x': 30, 'y': 40, 'width': 50, 'height': 60})
|
|
||||||
r2 = Rectangle.from_dict_xywh({'x': 40, 'y': 30, 'width': 55, 'height': 65})
|
|
||||||
r3 = Rectangle.from_dict_xywh({'x': 45, 'y': 35, 'width': 45, 'height': 55})
|
|
||||||
r4 = Rectangle.from_dict_xywh({'x': 25, 'y': 45, 'width': 45, 'height': 55})
|
|
||||||
d1 = {"pages": [{"cells": [r1.json_xywh(), r2.json_xywh()]}]}
|
|
||||||
d2 = {"pages": [{"cells": [r3.json_xywh(), r4.json_xywh()]}]}
|
|
||||||
|
|
||||||
compute_iou_from_boxes(r1, r2)
|
|
||||||
find_max_overlap(r1, [r2, r3, r4])
|
|
||||||
compute_page_iou([r1, r2], [r3, r4])
|
|
||||||
compute_document_score(d1, d2)
|
|
||||||
"""
|
|
||||||
@ -1,9 +1,19 @@
|
|||||||
from numpy import generic
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from PIL import Image
|
||||||
|
from funcy import first, iterate, keep, lmap, repeatedly
|
||||||
|
from numpy import generic
|
||||||
|
|
||||||
|
|
||||||
def copy_and_normalize_channels(image):
|
def copy_and_normalize_channels(image):
|
||||||
|
|
||||||
|
if isinstance(image, Image.Image):
|
||||||
|
image = np.array(image)
|
||||||
|
|
||||||
image = image.copy()
|
image = image.copy()
|
||||||
try:
|
try:
|
||||||
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
|
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
|
||||||
@ -17,3 +27,64 @@ def npconvert(ob):
|
|||||||
if isinstance(ob, generic):
|
if isinstance(ob, generic):
|
||||||
return ob.item()
|
return ob.item()
|
||||||
raise TypeError
|
raise TypeError
|
||||||
|
|
||||||
|
|
||||||
|
def lift(fn):
|
||||||
|
def lifted(coll):
|
||||||
|
yield from map(fn, coll)
|
||||||
|
|
||||||
|
return lifted
|
||||||
|
|
||||||
|
|
||||||
|
def star(fn):
|
||||||
|
def starred(args):
|
||||||
|
return fn(*args)
|
||||||
|
|
||||||
|
return starred
|
||||||
|
|
||||||
|
|
||||||
|
def lstarkeep(fn, coll):
|
||||||
|
return list(starkeep(fn, coll))
|
||||||
|
|
||||||
|
|
||||||
|
def starkeep(fn, coll):
|
||||||
|
yield from keep(star(fn), coll)
|
||||||
|
|
||||||
|
|
||||||
|
def until(cond, func, *args, **kwargs):
|
||||||
|
return first(filter(cond, iterate(func, *args, **kwargs)))
|
||||||
|
|
||||||
|
|
||||||
|
def conj(x, xs):
|
||||||
|
return [x, *xs]
|
||||||
|
|
||||||
|
|
||||||
|
def rconj(xs, x):
|
||||||
|
return [*xs, x]
|
||||||
|
|
||||||
|
|
||||||
|
def make_merger_sentinel():
|
||||||
|
def no_new_mergers(records):
|
||||||
|
nonlocal number_of_records_so_far
|
||||||
|
|
||||||
|
number_of_records_now = len(records)
|
||||||
|
|
||||||
|
if number_of_records_now == number_of_records_so_far:
|
||||||
|
return True
|
||||||
|
|
||||||
|
else:
|
||||||
|
number_of_records_so_far = number_of_records_now
|
||||||
|
return False
|
||||||
|
|
||||||
|
number_of_records_so_far = -1
|
||||||
|
|
||||||
|
return no_new_mergers
|
||||||
|
|
||||||
|
|
||||||
|
def zipmap(fn, boxes, n=2):
|
||||||
|
rets = lmap(list, zip(*map(fn, boxes)))
|
||||||
|
yield from repeatedly(lambda: [], n) if len(rets) < n else rets
|
||||||
|
|
||||||
|
|
||||||
|
def every_nth(n, iterable):
|
||||||
|
return itertools.islice(iterable, 0, None, n)
|
||||||
|
|||||||
1169
poetry.lock
generated
1169
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -36,6 +36,19 @@ loguru = "^0.6.0"
|
|||||||
pytest = "^7.0.1"
|
pytest = "^7.0.1"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[tool.poetry.group.test.dependencies]
|
||||||
|
albumentations = "^1.3.0"
|
||||||
|
faker = "^16.4.0"
|
||||||
|
pandas = "^1.5.2"
|
||||||
|
pytablewriter = "^0.64.2"
|
||||||
|
dataframe-image = "^0.1.5"
|
||||||
|
blend-modes = "^2.1.0"
|
||||||
|
|
||||||
|
|
||||||
|
[tool.poetry.group.dev.dependencies]
|
||||||
|
ipython = "^8.9.0"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry-core"]
|
requires = ["poetry-core"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|||||||
@ -1,50 +1,75 @@
|
|||||||
"""
|
|
||||||
Usage:
|
|
||||||
python scripts/annotate.py /home/iriley/Documents/pdf/scanned/10.pdf 5 --type table --show
|
|
||||||
python scripts/annotate.py /home/iriley/Documents/pdf/scanned/10.pdf 5 --type redaction --show
|
|
||||||
python scripts/annotate.py /home/iriley/Documents/pdf/scanned/10.pdf 5 --type layout --show
|
|
||||||
python scripts/annotate.py /home/iriley/Documents/pdf/scanned/10.pdf 5 --type figure --show
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
|
import loguru
|
||||||
|
|
||||||
|
from cv_analysis.figure_detection.figure_detection import detect_figures
|
||||||
|
from cv_analysis.layout_parsing import parse_layout
|
||||||
|
from cv_analysis.redaction_detection import find_redactions
|
||||||
|
from cv_analysis.table_parsing import parse_tables
|
||||||
from cv_analysis.utils.display import show_image
|
from cv_analysis.utils.display import show_image
|
||||||
from cv_analysis.utils.draw import draw_contours, draw_rectangles
|
from cv_analysis.utils.drawing import draw_contours, draw_rectangles
|
||||||
from cv_analysis.utils.open_pdf import open_pdf
|
from cv_analysis.utils.input import open_analysis_input_file
|
||||||
from cv_analysis.utils.visual_logging import vizlogger
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Annotate PDF pages with detected elements. Specified pages form a closed interval and are 1-based."
|
||||||
|
)
|
||||||
parser.add_argument("pdf_path")
|
parser.add_argument("pdf_path")
|
||||||
parser.add_argument("--page_index", type=int, default=0)
|
parser.add_argument(
|
||||||
parser.add_argument("--type", choices=["table", "redaction", "layout", "figure"], default="table")
|
"--first_page",
|
||||||
parser.add_argument("--show", action="store_true", default=False)
|
"-f",
|
||||||
|
type=int,
|
||||||
|
default=1,
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-last_page",
|
||||||
|
"-l",
|
||||||
|
help="if not specified, defaults to the value of the first page specified",
|
||||||
|
type=int,
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--type",
|
||||||
|
"-t",
|
||||||
|
help="element type to look for and analyze",
|
||||||
|
choices=["table", "redaction", "layout", "figure"],
|
||||||
|
default="table",
|
||||||
|
)
|
||||||
|
parser.add_argument("--page", "-p", type=int, default=1)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
|
||||||
def annotate_page(page_image, analysis_function, drawing_function, name="tmp.png", show=True):
|
def annotate_page(page_image, analysis_fn, draw_fn):
|
||||||
result = analysis_function(page_image)
|
result = analysis_fn(page_image)
|
||||||
page_image = drawing_function(page_image, result)
|
page_image = draw_fn(page_image, result)
|
||||||
vizlogger.debug(page_image, name)
|
|
||||||
show_image(page_image)
|
show_image(page_image)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def get_analysis_and_draw_fn_for_type(element_type):
|
||||||
args = parse_args()
|
analysis_fn, draw_fn = {
|
||||||
page = open_pdf(args.pdf_path, first_page=args.page_index, last_page=args.page_index)[0]
|
"table": (parse_tables, draw_rectangles),
|
||||||
name = f"{args.type}_final_result.png"
|
"redaction": (find_redactions, draw_contours),
|
||||||
draw = draw_rectangles
|
"layout": (parse_layout, draw_rectangles),
|
||||||
if args.type == "table":
|
"figure": (detect_figures, draw_rectangles),
|
||||||
from cv_analysis.table_parsing import parse_tables as analyze
|
}[element_type]
|
||||||
elif args.type == "redaction":
|
|
||||||
from cv_analysis.redaction_detection import find_redactions as analyze
|
|
||||||
|
|
||||||
draw = draw_contours
|
return analysis_fn, draw_fn
|
||||||
elif args.type == "layout":
|
|
||||||
from cv_analysis.layout_parsing import parse_layout as analyze
|
|
||||||
elif args.type == "figure":
|
def main(args):
|
||||||
from cv_analysis.figure_detection.figure_detection import detect_figures
|
loguru.logger.info(f"Annotating {args.type}s in {args.pdf_path}...")
|
||||||
analyze = detect_figures
|
|
||||||
annotate_page(page, analyze, draw, name=name, show=args.show)
|
pages = open_analysis_input_file(args.pdf_path, first_page=args.first_page, last_page=args.last_page)
|
||||||
|
|
||||||
|
for page in pages:
|
||||||
|
analysis_fn, draw_fn = get_analysis_and_draw_fn_for_type(args.type)
|
||||||
|
annotate_page(page, analysis_fn, draw_fn)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
try:
|
||||||
|
main(parse_args())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
|
|||||||
@ -10,7 +10,7 @@ from funcy import lmap
|
|||||||
from cv_analysis.figure_detection.figure_detection import detect_figures
|
from cv_analysis.figure_detection.figure_detection import detect_figures
|
||||||
from cv_analysis.layout_parsing import parse_layout
|
from cv_analysis.layout_parsing import parse_layout
|
||||||
from cv_analysis.table_parsing import parse_tables
|
from cv_analysis.table_parsing import parse_tables
|
||||||
from cv_analysis.utils.draw import draw_rectangles
|
from cv_analysis.utils.drawing import draw_rectangles
|
||||||
from pdf2img.conversion import convert_pages_to_images
|
from pdf2img.conversion import convert_pages_to_images
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -2,28 +2,27 @@ import argparse
|
|||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from cv_analysis.server.pipeline import get_analysis_pipeline
|
from loguru import logger
|
||||||
|
|
||||||
|
from cv_analysis.server.pipeline import make_analysis_pipeline_for_element_type
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("pdf")
|
parser.add_argument("pdf", type=Path)
|
||||||
parser.add_argument("--type", "-t", choices=["table", "layout", "figure"], required=True)
|
parser.add_argument("--element_type", "-t", choices=["table", "figure"], required=True)
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
|
||||||
|
analysis_fn = make_analysis_pipeline_for_element_type(args.element_type)
|
||||||
|
|
||||||
|
logger.info(f"Analysing document for {args.element_type}s...")
|
||||||
|
results = list(analysis_fn(args.pdf.read_bytes()))
|
||||||
|
|
||||||
|
print(json.dumps(results, indent=2))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
args = parse_args()
|
main(parse_args())
|
||||||
|
|
||||||
analysis_fn = get_analysis_pipeline(args.type)
|
|
||||||
|
|
||||||
with open(args.pdf, "rb") as f:
|
|
||||||
pdf_bytes = f.read()
|
|
||||||
|
|
||||||
results = list(analysis_fn(pdf_bytes))
|
|
||||||
|
|
||||||
folder = Path(args.pdf).parent
|
|
||||||
file_stem = Path(args.pdf).stem
|
|
||||||
|
|
||||||
with open(f"{folder}/{file_stem}_{args.type}.json", "w+") as f:
|
|
||||||
json.dump(results, f, indent=2)
|
|
||||||
|
|||||||
@ -4,7 +4,7 @@ import logging
|
|||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
|
||||||
from cv_analysis.config import get_config
|
from cv_analysis.config import get_config
|
||||||
from cv_analysis.server.pipeline import get_analysis_pipeline
|
from cv_analysis.server.pipeline import make_analysis_pipeline_for_segment_type
|
||||||
from cv_analysis.utils.banner import make_art
|
from cv_analysis.utils.banner import make_art
|
||||||
from pyinfra import config as pyinfra_config
|
from pyinfra import config as pyinfra_config
|
||||||
from pyinfra.queue.queue_manager import QueueManager
|
from pyinfra.queue.queue_manager import QueueManager
|
||||||
@ -31,7 +31,10 @@ def analysis_callback(queue_message: dict):
|
|||||||
should_publish_result = True
|
should_publish_result = True
|
||||||
|
|
||||||
object_bytes = gzip.decompress(storage.get_object(bucket, object_name))
|
object_bytes = gzip.decompress(storage.get_object(bucket, object_name))
|
||||||
analysis_fn = get_analysis_pipeline(operation, CV_CONFIG.table_parsing_skip_pages_without_images)
|
analysis_fn = make_analysis_pipeline_for_segment_type(
|
||||||
|
operation,
|
||||||
|
skip_pages_without_images=CV_CONFIG.table_parsing_skip_pages_without_images,
|
||||||
|
)
|
||||||
|
|
||||||
results = analysis_fn(object_bytes)
|
results = analysis_fn(object_bytes)
|
||||||
response = {**queue_message, "data": list(results)}
|
response = {**queue_message, "data": list(results)}
|
||||||
|
|||||||
17
synthesis/__init__.py
Normal file
17
synthesis/__init__.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument()
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main(parse_args())
|
||||||
47
synthesis/content_generator.py
Normal file
47
synthesis/content_generator.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
import itertools
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
from funcy import lsplit, lfilter
|
||||||
|
|
||||||
|
from cv_analysis.utils import every_nth, zipmap
|
||||||
|
from cv_analysis.utils.geometric import is_square_like
|
||||||
|
from cv_analysis.utils.merging import merge_related_rectangles
|
||||||
|
from cv_analysis.utils.postprocessing import remove_included, remove_overlapping
|
||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
from synthesis.random import rnd
|
||||||
|
from synthesis.segment.segments import (
|
||||||
|
generate_random_text_block,
|
||||||
|
generate_recursive_random_table_with_caption,
|
||||||
|
generate_random_plot_with_caption,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ContentGenerator:
|
||||||
|
def __init__(self):
|
||||||
|
self.constrain_layouts = True
|
||||||
|
|
||||||
|
def __call__(self, boxes: List[Rectangle]) -> Image:
|
||||||
|
rnd.shuffle(boxes)
|
||||||
|
|
||||||
|
figure_boxes, text_boxes = lsplit(is_square_like, boxes)
|
||||||
|
|
||||||
|
if self.constrain_layouts:
|
||||||
|
figure_boxes = merge_related_rectangles(figure_boxes)
|
||||||
|
figure_boxes = lfilter(is_square_like, figure_boxes)
|
||||||
|
text_boxes = merge_related_rectangles(text_boxes)
|
||||||
|
|
||||||
|
boxes = list(
|
||||||
|
itertools.chain(
|
||||||
|
map(generate_random_text_block, every_nth(2, text_boxes)),
|
||||||
|
*zipmap(generate_recursive_random_table_with_caption, every_nth(2, text_boxes[1:])),
|
||||||
|
*zipmap(generate_recursive_random_table_with_caption, every_nth(2, figure_boxes)),
|
||||||
|
*zipmap(generate_random_plot_with_caption, every_nth(2, figure_boxes[1:])),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.constrain_layouts:
|
||||||
|
boxes = remove_included(boxes)
|
||||||
|
boxes = remove_overlapping(boxes)
|
||||||
|
|
||||||
|
return boxes
|
||||||
0
synthesis/partitioner/__init__.py
Normal file
0
synthesis/partitioner/__init__.py
Normal file
71
synthesis/partitioner/page_partitioner.py
Normal file
71
synthesis/partitioner/page_partitioner.py
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
import abc
|
||||||
|
from typing import List, Tuple
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
from funcy import lflatten
|
||||||
|
|
||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
from synthesis.random import rnd
|
||||||
|
|
||||||
|
|
||||||
|
class PagePartitioner(abc.ABC):
|
||||||
|
# TODO: produce boxes for page numbers, headers and footers
|
||||||
|
def __init__(self):
|
||||||
|
self.left_margin_percentage = 0.05
|
||||||
|
self.right_margin_percentage = 0.05
|
||||||
|
self.top_margin_percentage = 0.1
|
||||||
|
self.bottom_margin_percentage = 0.1
|
||||||
|
|
||||||
|
self.recursive_margin_percentage = 0.007
|
||||||
|
self.max_recursion_depth = 3
|
||||||
|
self.initial_recursion_probability = 1
|
||||||
|
self.recursion_probability_decay = 0.1
|
||||||
|
|
||||||
|
def __call__(self, page: Image.Image) -> List[Rectangle]:
|
||||||
|
left_margin = int(page.width * self.left_margin_percentage)
|
||||||
|
right_margin = int(page.width * self.right_margin_percentage)
|
||||||
|
top_margin = int(page.height * self.top_margin_percentage)
|
||||||
|
bottom_margin = int(page.height * self.bottom_margin_percentage)
|
||||||
|
|
||||||
|
box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin)
|
||||||
|
boxes = lflatten(self.generate_content_boxes(box))
|
||||||
|
return boxes
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def generate_content_boxes(self, box: Rectangle, depth=0):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def generate_child_boxes(self, box: Rectangle, axis, split_percentage=0.5) -> Tuple[Rectangle, Rectangle]:
|
||||||
|
assert axis in ["x", "y"]
|
||||||
|
|
||||||
|
edge_anchor_point, edge_length = (box.x1, box.width) if axis == "x" else (box.y1, box.height)
|
||||||
|
split_coordinate = split_percentage * edge_length + edge_anchor_point
|
||||||
|
child_boxes = get_child_boxes(box, split_coordinate, axis, self.recursive_margin_percentage)
|
||||||
|
return child_boxes
|
||||||
|
|
||||||
|
def recurse(self, depth):
|
||||||
|
return rnd.random() <= self.recursion_probability(depth)
|
||||||
|
|
||||||
|
def recursion_probability(self, depth):
|
||||||
|
return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth
|
||||||
|
|
||||||
|
|
||||||
|
def get_child_boxes(box: Rectangle, split_coordinate, axis, margin_percentage) -> Tuple[Rectangle, Rectangle]:
|
||||||
|
assert axis in ["x", "y"]
|
||||||
|
|
||||||
|
def low(point_1d):
|
||||||
|
return point_1d * (1 + margin_percentage)
|
||||||
|
|
||||||
|
def high(point_1d):
|
||||||
|
return point_1d * (1 - margin_percentage)
|
||||||
|
|
||||||
|
if axis == "x":
|
||||||
|
return (
|
||||||
|
Rectangle(low(box.x1), low(box.y1), high(split_coordinate), high(box.y2)),
|
||||||
|
Rectangle(low(split_coordinate), low(box.y1), high(box.x2), high(box.y2)),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return (
|
||||||
|
Rectangle(low(box.x1), low(box.y1), high(box.x2), high(split_coordinate)),
|
||||||
|
Rectangle(low(box.x1), low(split_coordinate), high(box.x2), high(box.y2)),
|
||||||
|
)
|
||||||
22
synthesis/partitioner/random.py
Normal file
22
synthesis/partitioner/random.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
from synthesis.partitioner.page_partitioner import PagePartitioner
|
||||||
|
from synthesis.random import rnd
|
||||||
|
|
||||||
|
|
||||||
|
class RandomPagePartitioner(PagePartitioner):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def generate_content_boxes(self, box: Rectangle, depth=0):
|
||||||
|
if depth >= self.max_recursion_depth:
|
||||||
|
yield box
|
||||||
|
else:
|
||||||
|
child_boxes = self.generate_child_boxes(
|
||||||
|
box,
|
||||||
|
axis=rnd.choice(["x", "y"]),
|
||||||
|
split_percentage=rnd.uniform(0.3, 0.7),
|
||||||
|
)
|
||||||
|
if self.recurse(depth):
|
||||||
|
yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
|
||||||
|
else:
|
||||||
|
yield child_boxes
|
||||||
25
synthesis/partitioner/two_column.py
Normal file
25
synthesis/partitioner/two_column.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
from synthesis.partitioner.page_partitioner import PagePartitioner
|
||||||
|
from synthesis.random import rnd
|
||||||
|
|
||||||
|
|
||||||
|
class TwoColumnPagePartitioner(PagePartitioner):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.max_recursion_depth = 3
|
||||||
|
|
||||||
|
def generate_content_boxes(self, box: Rectangle, depth=0):
|
||||||
|
if depth >= self.max_recursion_depth:
|
||||||
|
yield box
|
||||||
|
|
||||||
|
else:
|
||||||
|
if depth == 0:
|
||||||
|
axis = "x"
|
||||||
|
split_percentage = 0.5
|
||||||
|
else:
|
||||||
|
axis = "y"
|
||||||
|
split_percentage = rnd.choice([0.3, 0.7])
|
||||||
|
|
||||||
|
child_boxes = self.generate_child_boxes(box, axis=axis, split_percentage=split_percentage)
|
||||||
|
|
||||||
|
yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
|
||||||
34
synthesis/random.py
Normal file
34
synthesis/random.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
import random
|
||||||
|
from functools import lru_cache
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
random_seed = random.randint(0, 2**32 - 1)
|
||||||
|
# random_seed = 2973413116
|
||||||
|
# random_seed = 2212357755
|
||||||
|
|
||||||
|
# random_seed = 2987558464 # light green
|
||||||
|
|
||||||
|
# random_seed = 1173898033 # strange bar plot
|
||||||
|
|
||||||
|
# 2467967671
|
||||||
|
|
||||||
|
logger.info(f"Random seed: {random_seed}")
|
||||||
|
rnd = random.Random(random_seed)
|
||||||
|
|
||||||
|
|
||||||
|
def maybe():
|
||||||
|
return rnd.random() > 0.9
|
||||||
|
|
||||||
|
|
||||||
|
def possibly():
|
||||||
|
return rnd.random() > 0.5
|
||||||
|
|
||||||
|
|
||||||
|
def probably():
|
||||||
|
return rnd.random() > 0.4
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def get_random_seed():
|
||||||
|
return rnd.randint(0, 2**32 - 1)
|
||||||
17
synthesis/segment/__init__.py
Normal file
17
synthesis/segment/__init__.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument()
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main(parse_args())
|
||||||
10
synthesis/segment/content_rectangle.py
Normal file
10
synthesis/segment/content_rectangle.py
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
|
||||||
|
|
||||||
|
class ContentRectangle(Rectangle):
|
||||||
|
def __init__(self, x1, y1, x2, y2, content=None):
|
||||||
|
super().__init__(x1, y1, x2, y2)
|
||||||
|
self.content = content
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"{self.__class__.__name__}({self.x1}, {self.y1}, {self.x2}, {self.y2}, content={self.content})"
|
||||||
192
synthesis/segment/plot.py
Normal file
192
synthesis/segment/plot.py
Normal file
@ -0,0 +1,192 @@
|
|||||||
|
import io
|
||||||
|
import random
|
||||||
|
from functools import lru_cache, partial
|
||||||
|
|
||||||
|
import loguru
|
||||||
|
import numpy as np
|
||||||
|
from PIL import Image
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
from matplotlib.colors import ListedColormap
|
||||||
|
|
||||||
|
from cv_analysis.utils.geometric import is_square_like, is_wide, is_tall
|
||||||
|
from cv_analysis.utils.image_operations import superimpose
|
||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
from synthesis.random import rnd, probably, maybe
|
||||||
|
from synthesis.segment.random_content_rectangle import RandomContentRectangle
|
||||||
|
from synthesis.text.text import generate_random_words
|
||||||
|
|
||||||
|
|
||||||
|
class RandomPlot(RandomContentRectangle):
|
||||||
|
def __init__(self, x1, y1, x2, y2, seed=None):
|
||||||
|
super().__init__(x1, y1, x2, y2, seed=seed)
|
||||||
|
|
||||||
|
self.cmap = pick_colormap()
|
||||||
|
|
||||||
|
def __call__(self, *args, **kwargs):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def generate_random_plot(self, rectangle: Rectangle):
|
||||||
|
|
||||||
|
if is_square_like(rectangle):
|
||||||
|
plt_fn = rnd.choice(
|
||||||
|
[
|
||||||
|
self.generate_random_line_plot,
|
||||||
|
self.generate_random_bar_plot,
|
||||||
|
self.generate_random_scatter_plot,
|
||||||
|
self.generate_random_histogram,
|
||||||
|
self.generate_random_pie_chart,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
elif is_wide(rectangle):
|
||||||
|
plt_fn = rnd.choice(
|
||||||
|
[
|
||||||
|
self.generate_random_line_plot,
|
||||||
|
self.generate_random_histogram,
|
||||||
|
self.generate_random_bar_plot,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
elif is_tall(rectangle):
|
||||||
|
plt_fn = rnd.choice(
|
||||||
|
[
|
||||||
|
self.generate_random_bar_plot,
|
||||||
|
self.generate_random_histogram,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
plt_fn = self.generate_random_scatter_plot
|
||||||
|
|
||||||
|
plt_fn(rectangle)
|
||||||
|
|
||||||
|
def generate_random_bar_plot(self, rectangle: Rectangle):
|
||||||
|
x = sorted(np.random.randint(low=1, high=11, size=5))
|
||||||
|
y = np.random.randint(low=1, high=11, size=5)
|
||||||
|
bar_fn = partial(
|
||||||
|
plt.bar,
|
||||||
|
log=random.choice([True, False]),
|
||||||
|
)
|
||||||
|
self.__generate_random_plot(bar_fn, rectangle, x, y)
|
||||||
|
|
||||||
|
def generate_random_line_plot(self, rectangle: Rectangle):
|
||||||
|
f = rnd.choice([np.sin, np.cos, np.tan, np.exp, np.log, np.sqrt, np.square])
|
||||||
|
|
||||||
|
x = np.linspace(0, 10, 100)
|
||||||
|
y = f(x)
|
||||||
|
|
||||||
|
plot_fn = partial(
|
||||||
|
plt.plot,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.__generate_random_plot(plot_fn, rectangle, x, y)
|
||||||
|
|
||||||
|
def generate_random_scatter_plot(self, rectangle: Rectangle):
|
||||||
|
n = rnd.randint(10, 40)
|
||||||
|
x = np.random.normal(size=n)
|
||||||
|
y = np.random.normal(size=n)
|
||||||
|
scatter_fn = partial(
|
||||||
|
plt.scatter,
|
||||||
|
cmap=self.cmap,
|
||||||
|
marker=rnd.choice(["o", "*", "+", "x"]),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.__generate_random_plot(scatter_fn, rectangle, x, y)
|
||||||
|
|
||||||
|
def generate_random_histogram(self, rectangle: Rectangle):
|
||||||
|
x = np.random.normal(size=100)
|
||||||
|
hist_fn = partial(
|
||||||
|
plt.hist,
|
||||||
|
orientation=random.choice(["horizontal", "vertical"]),
|
||||||
|
histtype=random.choice(["bar", "barstacked", "step", "stepfilled"]),
|
||||||
|
log=random.choice([True, False]),
|
||||||
|
stacked=random.choice([True, False]),
|
||||||
|
density=random.choice([True, False]),
|
||||||
|
cumulative=random.choice([True, False]),
|
||||||
|
)
|
||||||
|
self.__generate_random_plot(hist_fn, rectangle, x, random.randint(5, 20))
|
||||||
|
|
||||||
|
def generate_random_pie_chart(self, rectangle: Rectangle):
|
||||||
|
|
||||||
|
n = random.randint(3, 7)
|
||||||
|
x = np.random.uniform(size=n)
|
||||||
|
pie_fn = partial(
|
||||||
|
plt.pie,
|
||||||
|
shadow=True,
|
||||||
|
startangle=90,
|
||||||
|
pctdistance=0.85,
|
||||||
|
labeldistance=1.1,
|
||||||
|
colors=self.cmap(np.linspace(0, 1, 10)),
|
||||||
|
)
|
||||||
|
self.__generate_random_plot(
|
||||||
|
pie_fn,
|
||||||
|
rectangle,
|
||||||
|
x,
|
||||||
|
np.random.uniform(0, 0.1, size=n),
|
||||||
|
plot_kwargs=self.generate_plot_kwargs(keywords=["a"]),
|
||||||
|
)
|
||||||
|
|
||||||
|
def generate_plot_kwargs(self, keywords=None):
|
||||||
|
|
||||||
|
kwargs = {
|
||||||
|
"color": rnd.choice(self.cmap.colors),
|
||||||
|
"linestyle": rnd.choice(["-", "--", "-.", ":"]),
|
||||||
|
"linewidth": rnd.uniform(1, 4),
|
||||||
|
}
|
||||||
|
|
||||||
|
return kwargs if not keywords else {k: v for k, v in kwargs.items() if k in keywords}
|
||||||
|
|
||||||
|
def __generate_random_plot(self, plot_fn, rectangle: Rectangle, x, y, plot_kwargs=None):
|
||||||
|
|
||||||
|
plot_kwargs = self.generate_plot_kwargs() if plot_kwargs is None else plot_kwargs
|
||||||
|
|
||||||
|
fig, ax = plt.subplots()
|
||||||
|
fig.set_size_inches(rectangle.width / 100, rectangle.height / 100)
|
||||||
|
fig.tight_layout(pad=0)
|
||||||
|
|
||||||
|
plot_fn(x, y, **plot_kwargs)
|
||||||
|
ax.set_facecolor("none")
|
||||||
|
|
||||||
|
probably() and ax.set_title(generate_random_words(1, 3))
|
||||||
|
|
||||||
|
# disable axes at random
|
||||||
|
maybe() and ax.set_xticks([])
|
||||||
|
maybe() and ax.set_yticks([])
|
||||||
|
maybe() and ax.set_xticklabels([])
|
||||||
|
maybe() and ax.set_yticklabels([])
|
||||||
|
maybe() and ax.set_xlabel("")
|
||||||
|
maybe() and ax.set_ylabel("")
|
||||||
|
maybe() and ax.set_title("")
|
||||||
|
maybe() and ax.set_frame_on(False)
|
||||||
|
|
||||||
|
# remove spines at random
|
||||||
|
maybe() and (ax.spines["top"].set_visible(False) or ax.spines["right"].set_visible(False))
|
||||||
|
|
||||||
|
image = dump_plt_to_image(rectangle)
|
||||||
|
assert image.mode == "RGBA"
|
||||||
|
|
||||||
|
self.content = image if not self.content else superimpose(self.content, image)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def pick_colormap() -> ListedColormap:
|
||||||
|
cmap_name = rnd.choice(
|
||||||
|
[
|
||||||
|
"viridis",
|
||||||
|
"plasma",
|
||||||
|
"inferno",
|
||||||
|
"magma",
|
||||||
|
"cividis",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
loguru.logger.info(f"Using colormap {cmap_name}")
|
||||||
|
cmap = plt.get_cmap(cmap_name)
|
||||||
|
return cmap
|
||||||
|
|
||||||
|
|
||||||
|
def dump_plt_to_image(rectangle):
|
||||||
|
buf = io.BytesIO()
|
||||||
|
plt.savefig(buf, format="png", transparent=True)
|
||||||
|
buf.seek(0)
|
||||||
|
image = Image.open(buf)
|
||||||
|
image = image.resize((rectangle.width, rectangle.height))
|
||||||
|
buf.close()
|
||||||
|
plt.close()
|
||||||
|
return image
|
||||||
11
synthesis/segment/random_content_rectangle.py
Normal file
11
synthesis/segment/random_content_rectangle.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
import random
|
||||||
|
|
||||||
|
from synthesis.random import get_random_seed
|
||||||
|
from synthesis.segment.content_rectangle import ContentRectangle
|
||||||
|
|
||||||
|
|
||||||
|
class RandomContentRectangle(ContentRectangle):
|
||||||
|
def __init__(self, x1, y1, x2, y2, content=None, seed=None):
|
||||||
|
super().__init__(x1, y1, x2, y2, content)
|
||||||
|
self.seed = seed or get_random_seed()
|
||||||
|
self.random = random.Random(self.seed)
|
||||||
102
synthesis/segment/segments.py
Normal file
102
synthesis/segment/segments.py
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
from synthesis.random import probably, rnd
|
||||||
|
from synthesis.segment.content_rectangle import ContentRectangle
|
||||||
|
from synthesis.segment.plot import RandomPlot
|
||||||
|
from synthesis.segment.text_block import TextBlock
|
||||||
|
from synthesis.text.font import pick_random_font_available_on_system
|
||||||
|
from synthesis.text.text_block_generator.caption import CaptionGenerator
|
||||||
|
|
||||||
|
|
||||||
|
def generate_random_plot(rectangle: Rectangle) -> ContentRectangle:
|
||||||
|
block = RandomPlot(*rectangle.coords)
|
||||||
|
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
||||||
|
block.generate_random_plot(rectangle)
|
||||||
|
return block
|
||||||
|
|
||||||
|
|
||||||
|
def generate_recursive_random_table(rectangle: Rectangle, **kwargs) -> ContentRectangle:
|
||||||
|
from synthesis.segment.table.table import RecursiveRandomTable
|
||||||
|
|
||||||
|
block = RecursiveRandomTable(*rectangle.coords, **kwargs)
|
||||||
|
if isinstance(rectangle, RecursiveRandomTable):
|
||||||
|
block.content = rectangle.content if rectangle.content else None # TODO: Refactor
|
||||||
|
block.generate_random_table()
|
||||||
|
return block
|
||||||
|
|
||||||
|
|
||||||
|
def generate_text_block(rectangle: Rectangle, text) -> ContentRectangle:
|
||||||
|
block = TextBlock(
|
||||||
|
*rectangle.coords,
|
||||||
|
font=pick_random_font_available_on_system(
|
||||||
|
includes=("serif", "sans-serif", "bold"),
|
||||||
|
excludes=("mono", "italic", "oblique", "cursive"),
|
||||||
|
),
|
||||||
|
font_size=30, # TODO: De-hardcode font size... Seems to have no effect on top of that
|
||||||
|
)
|
||||||
|
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
||||||
|
block.put_text(text, rectangle)
|
||||||
|
return block
|
||||||
|
|
||||||
|
|
||||||
|
def generate_random_plot_with_caption(rectangle: Rectangle):
|
||||||
|
# TODO: deduplicate with generate_random_table_with_caption
|
||||||
|
plot_box, caption_box = split_into_figure_and_caption(rectangle)
|
||||||
|
plot_box = generate_random_plot(plot_box)
|
||||||
|
caption_box = generate_random_image_caption(caption_box)
|
||||||
|
return plot_box, caption_box
|
||||||
|
|
||||||
|
|
||||||
|
def generate_recursive_random_table_with_caption(rectangle: Rectangle):
|
||||||
|
table_box, caption_box = split_into_figure_and_caption(rectangle)
|
||||||
|
table_box = generate_recursive_random_table(table_box, double_rule=probably())
|
||||||
|
caption_box = generate_random_table_caption(caption_box)
|
||||||
|
return table_box, caption_box
|
||||||
|
|
||||||
|
|
||||||
|
def split_into_figure_and_caption(rectangle: Rectangle):
|
||||||
|
gap_percentage = rnd.uniform(0, 0.03)
|
||||||
|
split_point = rnd.uniform(0.5, 0.9)
|
||||||
|
figure_box = Rectangle(
|
||||||
|
rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y1 + rectangle.height * (split_point - gap_percentage / 2)
|
||||||
|
)
|
||||||
|
caption_box = Rectangle(
|
||||||
|
rectangle.x1, rectangle.y1 + rectangle.height * (split_point + gap_percentage / 2), rectangle.x2, rectangle.y2
|
||||||
|
)
|
||||||
|
return figure_box, caption_box
|
||||||
|
|
||||||
|
|
||||||
|
def generate_random_text_block(rectangle: Rectangle, n_sentences=3000) -> ContentRectangle:
|
||||||
|
block = TextBlock(
|
||||||
|
*rectangle.coords,
|
||||||
|
font=pick_random_font_available_on_system(
|
||||||
|
includes=("serif", "sans-serif"),
|
||||||
|
excludes=("bold", "mono", "italic", "oblique", "cursive"),
|
||||||
|
),
|
||||||
|
font_size=30, # TODO: De-hardcode font size... Seems to have no effect on top of that
|
||||||
|
)
|
||||||
|
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
||||||
|
block.generate_random_text(rectangle, n_sentences)
|
||||||
|
return block
|
||||||
|
|
||||||
|
|
||||||
|
def generate_random_image_caption(rectangle: Rectangle) -> ContentRectangle:
|
||||||
|
return generate_random_caption(rectangle, f"Fig {rnd.randint(1, 20)}")
|
||||||
|
|
||||||
|
|
||||||
|
def generate_random_table_caption(rectangle: Rectangle) -> ContentRectangle:
|
||||||
|
return generate_random_caption(rectangle, f"Tabl {rnd.randint(1, 20)}")
|
||||||
|
|
||||||
|
|
||||||
|
def generate_random_caption(rectangle: Rectangle, caption_start, n_sentences=1000) -> ContentRectangle:
|
||||||
|
block = TextBlock(
|
||||||
|
*rectangle.coords,
|
||||||
|
text_generator=CaptionGenerator(caption_start=caption_start),
|
||||||
|
font=pick_random_font_available_on_system(
|
||||||
|
includes=("italic",),
|
||||||
|
excludes=("bold", "mono"),
|
||||||
|
),
|
||||||
|
font_size=100, # TODO: De-hardcode font size... Seems to have no effect on top of that
|
||||||
|
)
|
||||||
|
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
||||||
|
block.generate_random_text(rectangle, n_sentences)
|
||||||
|
return block
|
||||||
0
synthesis/segment/table/__init__.py
Normal file
0
synthesis/segment/table/__init__.py
Normal file
81
synthesis/segment/table/cell.py
Normal file
81
synthesis/segment/table/cell.py
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
from PIL import Image, ImageDraw
|
||||||
|
|
||||||
|
from cv_analysis.utils.image_operations import superimpose
|
||||||
|
from synthesis.segment.content_rectangle import ContentRectangle
|
||||||
|
|
||||||
|
|
||||||
|
class Cell(ContentRectangle):
|
||||||
|
def __init__(self, x1, y1, x2, y2, color=None):
|
||||||
|
super().__init__(x1, y1, x2, y2)
|
||||||
|
|
||||||
|
self.background_color = color or (255, 255, 255, 0)
|
||||||
|
|
||||||
|
# to debug use random border color: tuple([random.randint(100, 200) for _ in range(3)] + [255])
|
||||||
|
self.cell_border_color = (0, 0, 0, 255)
|
||||||
|
|
||||||
|
self.border_width = 1
|
||||||
|
self.inset = 1
|
||||||
|
|
||||||
|
self.content = Image.new("RGBA", (self.width, self.height))
|
||||||
|
self.fill()
|
||||||
|
|
||||||
|
def draw_top_border(self, width=None):
|
||||||
|
self.draw_line((0, 0, self.width - self.inset, 0), width=width)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def draw_bottom_border(self, width=None):
|
||||||
|
self.draw_line((0, self.height - self.inset, self.width - self.inset, self.height - self.inset), width=width)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def draw_left_border(self, width=None):
|
||||||
|
self.draw_line((0, 0, 0, self.height), width=width)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def draw_right_border(self, width=None):
|
||||||
|
self.draw_line((self.width - self.inset, 0, self.width - self.inset, self.height), width=width)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def draw_line(self, points, width=None):
|
||||||
|
width = width or self.border_width
|
||||||
|
draw = ImageDraw.Draw(self.content)
|
||||||
|
draw.line(points, width=width, fill=self.cell_border_color)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def draw(self, width=None):
|
||||||
|
self.draw_top_border(width=width)
|
||||||
|
self.draw_bottom_border(width=width)
|
||||||
|
self.draw_left_border(width=width)
|
||||||
|
self.draw_right_border(width=width)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def draw_top_left_corner(self, width=None):
|
||||||
|
self.draw_line((0, 0, 0, 0), width=width)
|
||||||
|
self.draw_line((0, 0, 0, 0), width=width)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def draw_top_right_corner(self, width=None):
|
||||||
|
self.draw_line((self.width - self.inset, 0, self.width - self.inset, 0), width=width)
|
||||||
|
self.draw_line((self.width - self.inset, 0, self.width - self.inset, 0), width=width)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def draw_bottom_left_corner(self, width=None):
|
||||||
|
self.draw_line((0, self.height - self.inset, 0, self.height - self.inset), width=width)
|
||||||
|
self.draw_line((0, self.height - self.inset, 0, self.height - self.inset), width=width)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def draw_bottom_right_corner(self, width=None):
|
||||||
|
self.draw_line(
|
||||||
|
(self.width - self.inset, self.height - self.inset, self.width - self.inset, self.height - self.inset),
|
||||||
|
width=width,
|
||||||
|
)
|
||||||
|
self.draw_line(
|
||||||
|
(self.width - self.inset, self.height - self.inset, self.width - self.inset, self.height - self.inset),
|
||||||
|
width=width,
|
||||||
|
)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def fill(self, color=None):
|
||||||
|
color = color or self.background_color
|
||||||
|
image = Image.new("RGBA", (self.width, self.height), color=color)
|
||||||
|
self.content = superimpose(image, self.content)
|
||||||
|
return self
|
||||||
279
synthesis/segment/table/table.py
Normal file
279
synthesis/segment/table/table.py
Normal file
@ -0,0 +1,279 @@
|
|||||||
|
import random
|
||||||
|
from copy import deepcopy
|
||||||
|
from enum import Enum
|
||||||
|
from functools import lru_cache, partial
|
||||||
|
from math import sqrt
|
||||||
|
from typing import List, Iterable
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
from funcy import chunks, mapcat, repeatedly
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from cv_analysis.utils.geometric import is_square_like
|
||||||
|
from cv_analysis.utils.image_operations import superimpose
|
||||||
|
|
||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
from cv_analysis.utils.spacial import area
|
||||||
|
from synthesis.random import rnd, possibly
|
||||||
|
from synthesis.segment.content_rectangle import ContentRectangle
|
||||||
|
from synthesis.segment.plot import pick_colormap
|
||||||
|
from synthesis.segment.random_content_rectangle import RandomContentRectangle
|
||||||
|
from synthesis.segment.segments import generate_random_plot, generate_recursive_random_table, generate_text_block
|
||||||
|
from synthesis.segment.table.cell import Cell
|
||||||
|
from synthesis.text.text import generate_random_words, generate_random_number
|
||||||
|
|
||||||
|
|
||||||
|
class RecursiveRandomTable(RandomContentRectangle):
|
||||||
|
def __init__(self, x1, y1, x2, y2, border_width=1, layout: str = None, double_rule=False):
|
||||||
|
"""A table with a random number of rows and columns, and random content in each cell.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x1: x-coordinate of the top-left corner
|
||||||
|
y1: y-coordinate of the top-left corner
|
||||||
|
x2: x-coordinate of the bottom-right corner
|
||||||
|
y2: y-coordinate of the bottom-right corner
|
||||||
|
border_width: width of the table border
|
||||||
|
layout: layout of the table, either "horizontal", "vertical", "closed", or "open"
|
||||||
|
double_rule: whether to use double rules as the top and bottom rules
|
||||||
|
"""
|
||||||
|
|
||||||
|
assert layout in [None, "horizontal", "vertical", "closed", "open"]
|
||||||
|
|
||||||
|
super().__init__(x1, y1, x2, y2)
|
||||||
|
|
||||||
|
self.double_rule = double_rule
|
||||||
|
self.double_rule_width = (3 * border_width) if self.double_rule else 0
|
||||||
|
|
||||||
|
self.n_columns = rnd.randint(1, max(self.width // 100, 1))
|
||||||
|
self.n_rows = rnd.randint(1, max((self.height - 2 * self.double_rule_width) // rnd.randint(17, 100), 1))
|
||||||
|
self.cell_size = (self.width / self.n_columns, (self.height - 2 * self.double_rule_width) / self.n_rows)
|
||||||
|
|
||||||
|
self.content = Image.new("RGBA", (self.width, self.height), (255, 255, 255, 0))
|
||||||
|
|
||||||
|
self.background_color = get_random_background_color()
|
||||||
|
|
||||||
|
self.layout = layout or self.pick_random_layout()
|
||||||
|
logger.debug(f"Layout: {self.layout}")
|
||||||
|
|
||||||
|
def pick_random_layout(self):
|
||||||
|
|
||||||
|
if self.n_columns == 1 and self.n_rows == 1:
|
||||||
|
layout = "closed"
|
||||||
|
elif self.n_columns == 1:
|
||||||
|
layout = rnd.choice(["vertical", "closed"])
|
||||||
|
elif self.n_rows == 1:
|
||||||
|
layout = rnd.choice(["horizontal", "closed"])
|
||||||
|
else:
|
||||||
|
layout = rnd.choice(["closed", "horizontal", "vertical", "open"])
|
||||||
|
|
||||||
|
return layout
|
||||||
|
|
||||||
|
def generate_random_table(self):
|
||||||
|
cells = self.generate_table()
|
||||||
|
cells = list(self.fill_cells_with_content(cells))
|
||||||
|
# FIXME: There is a bug here: Table rule is not drawn correctly, actually we want to do cells = ...
|
||||||
|
list(self.draw_cell_borders(cells))
|
||||||
|
|
||||||
|
self.content = paste_contents(self.content, cells)
|
||||||
|
assert self.content.mode == "RGBA"
|
||||||
|
|
||||||
|
def fill_cells_with_content(self, cells):
|
||||||
|
yield from map(self.build_cell, cells)
|
||||||
|
|
||||||
|
def build_cell(self, cell):
|
||||||
|
|
||||||
|
if self.__is_a_small_cell(cell):
|
||||||
|
cell = self.build_small_cell(cell)
|
||||||
|
|
||||||
|
elif self.__is_a_medium_sized_cell(cell):
|
||||||
|
cell = self.build_medium_sized_cell(cell)
|
||||||
|
|
||||||
|
elif self.__is_a_large_cell(cell):
|
||||||
|
cell = self.build_large_cell(cell)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid cell size: {get_size(cell)}")
|
||||||
|
|
||||||
|
assert cell.content.mode == "RGBA"
|
||||||
|
|
||||||
|
return cell
|
||||||
|
|
||||||
|
def __is_a_small_cell(self, cell):
|
||||||
|
return get_size(cell) <= Size.SMALL.value
|
||||||
|
|
||||||
|
def __is_a_medium_sized_cell(self, cell):
|
||||||
|
return get_size(cell) <= Size.MEDIUM.value
|
||||||
|
|
||||||
|
def __is_a_large_cell(self, cell):
|
||||||
|
return get_size(cell) > Size.MEDIUM.value
|
||||||
|
|
||||||
|
def build_small_cell(self, cell):
|
||||||
|
|
||||||
|
content = (possibly() and generate_random_words(1, 3)) or (
|
||||||
|
generate_random_number()
|
||||||
|
+ ((possibly() and " " + rnd.choice(["$", "£", "%", "EUR", "USD", "CAD", "ADA"])) or "")
|
||||||
|
)
|
||||||
|
|
||||||
|
return generate_text_block(cell, content)
|
||||||
|
|
||||||
|
def build_medium_sized_cell(self, cell):
|
||||||
|
|
||||||
|
choice = rnd.choice(["plot", "recurse"])
|
||||||
|
|
||||||
|
if choice == "plot":
|
||||||
|
return generate_random_plot(cell)
|
||||||
|
|
||||||
|
elif choice == "recurse":
|
||||||
|
return generate_recursive_random_table(
|
||||||
|
cell,
|
||||||
|
border_width=1,
|
||||||
|
layout=random.choice(["open", "horizontal", "vertical"]),
|
||||||
|
double_rule=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
return generate_text_block(cell, f"{choice} {get_size(cell):.0f} {get_size_class(cell).name}")
|
||||||
|
|
||||||
|
def build_large_cell(self, cell):
|
||||||
|
choice = rnd.choice(["plot", "recurse"])
|
||||||
|
|
||||||
|
logger.debug(f"Generating {choice} {get_size(cell):.0f} {get_size_class(cell).name}")
|
||||||
|
|
||||||
|
if choice == "plot" and is_square_like(cell):
|
||||||
|
return generate_random_plot(cell)
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.debug(f"recurse {get_size(cell):.0f} {get_size_class(cell).name}")
|
||||||
|
return generate_recursive_random_table(
|
||||||
|
cell,
|
||||||
|
border_width=1,
|
||||||
|
layout=random.choice(["open", "horizontal", "vertical"]),
|
||||||
|
double_rule=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
def draw_cell_borders(self, cells: List[ContentRectangle]):
|
||||||
|
def draw_edges_based_on_position(cell: Cell, col_idx, row_index):
|
||||||
|
# Draw the borders of the cell based on its position in the table
|
||||||
|
if col_idx < self.n_columns - 1:
|
||||||
|
cell.draw_right_border()
|
||||||
|
|
||||||
|
if row_index < self.n_rows - 1:
|
||||||
|
cell.draw_bottom_border()
|
||||||
|
|
||||||
|
columns = chunks(self.n_rows, cells)
|
||||||
|
for col_idx, column in enumerate(columns):
|
||||||
|
for row_index, cell in enumerate(column):
|
||||||
|
# TODO: Refactor
|
||||||
|
c = Cell(*cell.coords, self.background_color)
|
||||||
|
c.content = cell.content
|
||||||
|
draw_edges_based_on_position(c, col_idx, row_index)
|
||||||
|
yield cell
|
||||||
|
|
||||||
|
if self.layout == "closed":
|
||||||
|
# TODO: Refactor
|
||||||
|
c = Cell(*self.coords, self.background_color)
|
||||||
|
c.content = self.content
|
||||||
|
c.draw()
|
||||||
|
yield self
|
||||||
|
|
||||||
|
# TODO: Refactor
|
||||||
|
if self.double_rule:
|
||||||
|
c1 = Cell(*self.coords)
|
||||||
|
c1.draw_top_border(width=1)
|
||||||
|
c1.draw_bottom_border(width=1)
|
||||||
|
|
||||||
|
x1, y1, x2, y2 = self.coords
|
||||||
|
c2 = Cell(x1, y1 + self.double_rule_width, x2, y2 - self.double_rule_width)
|
||||||
|
c2.draw_top_border(width=1)
|
||||||
|
c2.draw_bottom_border(width=1)
|
||||||
|
|
||||||
|
c = superimpose(c1.content, c2.content)
|
||||||
|
|
||||||
|
self.content = superimpose(c, self.content)
|
||||||
|
|
||||||
|
yield self
|
||||||
|
|
||||||
|
def generate_table(self) -> Iterable[ContentRectangle]:
|
||||||
|
yield from mapcat(self.generate_column, range(self.n_columns))
|
||||||
|
|
||||||
|
def generate_column(self, column_index) -> Iterable[ContentRectangle]:
|
||||||
|
logger.trace(f"Generating column {column_index}.")
|
||||||
|
generate_cell_for_row_index = partial(self.generate_cell, column_index)
|
||||||
|
yield from map(generate_cell_for_row_index, range(self.n_rows))
|
||||||
|
|
||||||
|
def generate_cell(self, column_index, row_index) -> ContentRectangle:
|
||||||
|
w, h = self.cell_size
|
||||||
|
x1, y1 = (column_index * w), (row_index * h) + self.double_rule_width
|
||||||
|
x2, y2 = x1 + w, y1 + h
|
||||||
|
logger.trace(f"Generating cell ({row_index}, {column_index}) at ({x1}, {y1}, {x2}, {y2}).")
|
||||||
|
return Cell(x1, y1, x2, y2, self.background_color)
|
||||||
|
|
||||||
|
def generate_column_names(self):
|
||||||
|
column_names = repeatedly(self.generate_column_name, self.n_columns)
|
||||||
|
return column_names
|
||||||
|
|
||||||
|
def generate_column_name(self):
|
||||||
|
column_name = generate_random_words(1, 3)
|
||||||
|
return column_name
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def get_random_background_color():
|
||||||
|
return tuple([*get_random_color_complementing_color_map(pick_colormap()), rnd.randint(100, 210)])
|
||||||
|
|
||||||
|
|
||||||
|
def get_random_color_complementing_color_map(colormap):
|
||||||
|
def color_complement(r, g, b):
|
||||||
|
"""Reference: https://stackoverflow.com/a/40234924"""
|
||||||
|
|
||||||
|
def hilo(a, b, c):
|
||||||
|
if c < b:
|
||||||
|
b, c = c, b
|
||||||
|
if b < a:
|
||||||
|
a, b = b, a
|
||||||
|
if c < b:
|
||||||
|
b, c = c, b
|
||||||
|
return a + c
|
||||||
|
|
||||||
|
k = hilo(r, g, b)
|
||||||
|
return tuple(k - u for u in (r, g, b))
|
||||||
|
|
||||||
|
color = colormap(0.2)[:3]
|
||||||
|
color = [int(255 * v) for v in color]
|
||||||
|
color = color_complement(*color)
|
||||||
|
return color
|
||||||
|
|
||||||
|
|
||||||
|
def paste_contents(page, contents: Iterable[ContentRectangle]):
|
||||||
|
page = deepcopy(page)
|
||||||
|
for content in contents:
|
||||||
|
paste_content(page, content)
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
def paste_content(page, content_box: ContentRectangle):
|
||||||
|
assert content_box.content.mode == "RGBA"
|
||||||
|
page.paste(content_box.content, (content_box.x1, content_box.y1), content_box.content)
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
def get_size_class(rectangle: Rectangle):
|
||||||
|
size = get_size(rectangle)
|
||||||
|
if size < Size.SMALL.value:
|
||||||
|
return Size.SMALL
|
||||||
|
elif size < Size.LARGE.value:
|
||||||
|
return Size.MEDIUM
|
||||||
|
else:
|
||||||
|
return Size.LARGE
|
||||||
|
|
||||||
|
|
||||||
|
def get_size(rectangle: Rectangle):
|
||||||
|
size = sqrt(area(rectangle))
|
||||||
|
return size
|
||||||
|
|
||||||
|
|
||||||
|
class Size(Enum):
|
||||||
|
# FIXME: this has to scale with the DPI
|
||||||
|
SMALL = 120
|
||||||
|
MEDIUM = 180
|
||||||
|
LARGE = 300
|
||||||
62
synthesis/segment/text_block.py
Normal file
62
synthesis/segment/text_block.py
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
from typing import List
|
||||||
|
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
from funcy import first
|
||||||
|
|
||||||
|
from cv_analysis.utils.image_operations import superimpose
|
||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
from synthesis.segment.content_rectangle import ContentRectangle
|
||||||
|
from synthesis.text.text_block_generator.paragraph import ParagraphGenerator
|
||||||
|
from synthesis.text.font import pick_random_mono_space_font_available_on_system
|
||||||
|
|
||||||
|
|
||||||
|
class TextBlock(ContentRectangle):
|
||||||
|
def __init__(self, x1, y1, x2, y2, text_generator=None, font=None, font_size=None):
|
||||||
|
super().__init__(x1, y1, x2, y2)
|
||||||
|
self.font = font or ImageFont.load_default() # pick_random_font_available_on_system(size=font_size)
|
||||||
|
self.text_generator = text_generator or ParagraphGenerator()
|
||||||
|
|
||||||
|
def __call__(self, *args, **kwargs):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def generate_random_text(self, rectangle: Rectangle, n_sentences=3000):
|
||||||
|
lines = self.text_generator(rectangle, n_sentences)
|
||||||
|
image = write_lines_to_image(lines, rectangle, self.font)
|
||||||
|
return self.__put_content(image)
|
||||||
|
|
||||||
|
def put_text(self, text: str, rectangle: Rectangle):
|
||||||
|
|
||||||
|
text_width, text_height = self.font.getsize(text)
|
||||||
|
|
||||||
|
width_delta = text_width - rectangle.width
|
||||||
|
height_delta = text_height - rectangle.height
|
||||||
|
|
||||||
|
image = Image.new("RGBA", (text_width, text_height), (0, 255, 255, 0))
|
||||||
|
|
||||||
|
if width_delta > 0 or height_delta > 0:
|
||||||
|
image = image.resize((int(rectangle.width * 0.9), text_height))
|
||||||
|
|
||||||
|
draw = ImageDraw.Draw(image)
|
||||||
|
draw.text((0, 0), text, font=self.font, fill=(0, 0, 0, 255))
|
||||||
|
return self.__put_content(image)
|
||||||
|
|
||||||
|
def __put_content(self, image: Image.Image):
|
||||||
|
self.content = image if not self.content else superimpose(self.content, image)
|
||||||
|
assert self.content.mode == "RGBA"
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
def write_lines_to_image(lines: List[str], rectangle: Rectangle, font=None) -> Image.Image:
|
||||||
|
def write_line(line, line_number):
|
||||||
|
draw.text((0, line_number * text_size), line, font=font, fill=(0, 0, 0, 255))
|
||||||
|
|
||||||
|
font = font or pick_random_mono_space_font_available_on_system()
|
||||||
|
|
||||||
|
image = Image.new("RGBA", (rectangle.width, rectangle.height), (0, 255, 255, 0))
|
||||||
|
draw = ImageDraw.Draw(image)
|
||||||
|
text_size = draw.textsize(first(lines), font=font)[1]
|
||||||
|
|
||||||
|
for line_number, line in enumerate(lines):
|
||||||
|
write_line(line, line_number)
|
||||||
|
|
||||||
|
return image
|
||||||
0
synthesis/text/__init__.py
Normal file
0
synthesis/text/__init__.py
Normal file
106
synthesis/text/font.py
Normal file
106
synthesis/text/font.py
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
import itertools
|
||||||
|
from functools import lru_cache
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
from funcy import lmap, complement, keep, first, lzip, omit, project
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from synthesis.random import rnd
|
||||||
|
|
||||||
|
|
||||||
|
class RandomFontPicker:
|
||||||
|
def __init__(self, font_dir=None, return_default_font=False):
|
||||||
|
fonts = get_fonts(font_dir)
|
||||||
|
fonts_lower = [font.lower() for font in fonts]
|
||||||
|
domestic_fonts_mask = lmap(complement(self.looks_foreign), fonts_lower)
|
||||||
|
self.fonts = list(itertools.compress(fonts, domestic_fonts_mask))
|
||||||
|
self.fonts_lower = list(itertools.compress(fonts_lower, domestic_fonts_mask))
|
||||||
|
|
||||||
|
self.test_image = Image.new("RGB", (200, 200), (255, 255, 255))
|
||||||
|
self.draw = ImageDraw.Draw(self.test_image)
|
||||||
|
self.return_default_font = return_default_font
|
||||||
|
|
||||||
|
def looks_foreign(self, font):
|
||||||
|
# This filters out foreign fonts (e.g. 'Noto Serif Malayalam')
|
||||||
|
return len(font.split("-")[0]) > 10
|
||||||
|
|
||||||
|
def pick_random_font_available_on_system(self, includes=None, excludes=None) -> ImageFont: # FIXME: Slow!
|
||||||
|
|
||||||
|
if self.return_default_font:
|
||||||
|
return ImageFont.load_default()
|
||||||
|
|
||||||
|
includes = [i.lower() for i in includes] if includes else []
|
||||||
|
excludes = [i.lower() for i in excludes] if excludes else []
|
||||||
|
|
||||||
|
logger.debug(f"Picking font by includes={includes} and excludes={excludes}.")
|
||||||
|
|
||||||
|
def includes_pattern(font):
|
||||||
|
return not includes or any(include in font for include in includes)
|
||||||
|
|
||||||
|
def excludes_pattern(font):
|
||||||
|
return not excludes or not any(exclude in font for exclude in excludes)
|
||||||
|
|
||||||
|
self.shuffle_fonts()
|
||||||
|
|
||||||
|
mask = lmap(lambda f: includes_pattern(f) and excludes_pattern(f), self.fonts_lower)
|
||||||
|
fonts = itertools.compress(self.fonts, mask)
|
||||||
|
fonts = keep(map(self.load_font, fonts))
|
||||||
|
# fonts = filter(self.font_is_renderable, fonts) # FIXME: this does not work
|
||||||
|
|
||||||
|
font = first(fonts)
|
||||||
|
logger.info(f"Using font: {font.getname()}")
|
||||||
|
return font
|
||||||
|
|
||||||
|
def shuffle_fonts(self):
|
||||||
|
l = lzip(self.fonts, self.fonts_lower)
|
||||||
|
rnd.shuffle(l)
|
||||||
|
self.fonts, self.fonts_lower = lzip(*l)
|
||||||
|
|
||||||
|
def pick_random_mono_space_font_available_on_system(self) -> ImageFont:
|
||||||
|
return self.pick_random_font_available_on_system(includes=["mono"], excludes=["oblique"])
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def load_font(self, font: str):
|
||||||
|
logger.trace(f"Loading font: {font}")
|
||||||
|
try:
|
||||||
|
return ImageFont.truetype(font, size=11)
|
||||||
|
except OSError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def font_is_renderable(self, font):
|
||||||
|
text_size = self.draw.textsize("Test String", font=font)
|
||||||
|
return text_size[0] > 0 and text_size[1]
|
||||||
|
|
||||||
|
|
||||||
|
def get_fonts(path: Path = None) -> List[str]:
|
||||||
|
path = path or Path("/usr/share/fonts")
|
||||||
|
fonts = list(path.rglob("*.ttf"))
|
||||||
|
fonts = [font.name for font in fonts]
|
||||||
|
return fonts
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def get_font_picker(**kwargs):
|
||||||
|
return RandomFontPicker(**kwargs, return_default_font=True)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def pick_random_mono_space_font_available_on_system(**kwargs):
|
||||||
|
font_picker = get_font_picker(**omit(kwargs, ["includes", "excludes"]))
|
||||||
|
return font_picker.pick_random_mono_space_font_available_on_system()
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def pick_random_font_available_on_system(**kwargs):
|
||||||
|
kwargs["excludes"] = (
|
||||||
|
*kwargs.get(
|
||||||
|
"excludes",
|
||||||
|
),
|
||||||
|
"Kinnari",
|
||||||
|
"KacstOne",
|
||||||
|
)
|
||||||
|
font_picker = get_font_picker(**omit(kwargs, ["includes", "excludes"]))
|
||||||
|
return font_picker.pick_random_font_available_on_system(**project(kwargs, ["includes", "excludes"]))
|
||||||
0
synthesis/text/line_formatter/__init__.py
Normal file
0
synthesis/text/line_formatter/__init__.py
Normal file
9
synthesis/text/line_formatter/identity.py
Normal file
9
synthesis/text/line_formatter/identity.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
from synthesis.text.line_formatter.line_formatter import LineFormatter
|
||||||
|
|
||||||
|
|
||||||
|
class IdentityLineFormatter(LineFormatter):
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __call__(self, lines, last_full):
|
||||||
|
return lines, last_full
|
||||||
5
synthesis/text/line_formatter/line_formatter.py
Normal file
5
synthesis/text/line_formatter/line_formatter.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
import abc
|
||||||
|
|
||||||
|
|
||||||
|
class LineFormatter(abc.ABC):
|
||||||
|
pass
|
||||||
41
synthesis/text/line_formatter/paragraph.py
Normal file
41
synthesis/text/line_formatter/paragraph.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
from funcy import identity, compose, first, juxt, rest, rcompose
|
||||||
|
|
||||||
|
from cv_analysis.utils import star, rconj
|
||||||
|
from synthesis.random import rnd
|
||||||
|
from synthesis.text.line_formatter.line_formatter import LineFormatter
|
||||||
|
|
||||||
|
|
||||||
|
class ParagraphLineFormatter(LineFormatter):
|
||||||
|
def __init__(self, blank_line_percentage=None):
|
||||||
|
self.blank_line_percentage = blank_line_percentage or rnd.uniform(0, 0.5)
|
||||||
|
|
||||||
|
def __call__(self, lines, last_full):
|
||||||
|
return self.format_lines(lines, last_full)
|
||||||
|
|
||||||
|
def format_lines(self, lines, last_full):
|
||||||
|
def truncate_current_line():
|
||||||
|
return rnd.random() < self.blank_line_percentage and last_full
|
||||||
|
|
||||||
|
# This is meant to be read from the bottom up.
|
||||||
|
current_line_shall_not_be_a_full_line = truncate_current_line()
|
||||||
|
line_formatter = self.truncate_line if current_line_shall_not_be_a_full_line else identity
|
||||||
|
format_current_line = compose(line_formatter, first)
|
||||||
|
move_current_line_to_back = star(rconj)
|
||||||
|
split_first_line_from_lines_and_format_the_former = juxt(rest, format_current_line)
|
||||||
|
split_off_current_line_then_format_it_then_move_it_to_the_back = rcompose(
|
||||||
|
split_first_line_from_lines_and_format_the_former,
|
||||||
|
move_current_line_to_back,
|
||||||
|
)
|
||||||
|
current_line_is_a_full_line = not current_line_shall_not_be_a_full_line
|
||||||
|
# Start reading here and move up.
|
||||||
|
return split_off_current_line_then_format_it_then_move_it_to_the_back(lines), current_line_is_a_full_line
|
||||||
|
|
||||||
|
def format_line(self, line, full=True):
|
||||||
|
line = self.truncate_line(line) if not full else line
|
||||||
|
return line, full
|
||||||
|
|
||||||
|
def truncate_line(self, line: str):
|
||||||
|
n_trailing_words = rnd.randint(0, 4)
|
||||||
|
line = " ".join(line.split()[-n_trailing_words - 1 : -1]).replace(".", "")
|
||||||
|
line = line + ".\n" if line else line
|
||||||
|
return line
|
||||||
26
synthesis/text/text.py
Normal file
26
synthesis/text/text.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
import random
|
||||||
|
|
||||||
|
from faker import Faker
|
||||||
|
|
||||||
|
from synthesis.random import rnd
|
||||||
|
|
||||||
|
|
||||||
|
def generate_random_words(n_min, n_max):
|
||||||
|
words = " ".join(Faker().words(rnd.randint(n_min, n_max)))
|
||||||
|
return words
|
||||||
|
|
||||||
|
|
||||||
|
def generate_random_number():
|
||||||
|
return str(
|
||||||
|
round(
|
||||||
|
random.choice(
|
||||||
|
[
|
||||||
|
random.randint(-10000, 10000),
|
||||||
|
random.uniform(-100, 100),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
random.choice(
|
||||||
|
[0, 1, 2, 3],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
0
synthesis/text/text_block_generator/__init__.py
Normal file
0
synthesis/text/text_block_generator/__init__.py
Normal file
22
synthesis/text/text_block_generator/caption.py
Normal file
22
synthesis/text/text_block_generator/caption.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
from funcy import first, rest
|
||||||
|
|
||||||
|
from cv_analysis.utils import conj
|
||||||
|
from synthesis.random import rnd
|
||||||
|
from synthesis.text.text_block_generator.paragraph import generate_random_text_lines
|
||||||
|
from synthesis.text.text_block_generator.text_block_generator import TextBlockGenerator
|
||||||
|
from synthesis.text.line_formatter.identity import IdentityLineFormatter
|
||||||
|
|
||||||
|
|
||||||
|
class CaptionGenerator(TextBlockGenerator):
|
||||||
|
def __init__(self, caption_start=None):
|
||||||
|
self.line_formatter = IdentityLineFormatter()
|
||||||
|
self.caption_start = caption_start or f"Fig {rnd.randint(1, 20)}"
|
||||||
|
|
||||||
|
def __call__(self, rectangle, n_sentences):
|
||||||
|
return self.generate_paragraph(rectangle, n_sentences)
|
||||||
|
|
||||||
|
def generate_paragraph(self, rectangle, n_sentences):
|
||||||
|
lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences)
|
||||||
|
first_line_modified = f"{self.caption_start}.: {first(lines)}"
|
||||||
|
lines = conj(first_line_modified, rest(lines))
|
||||||
|
return lines
|
||||||
36
synthesis/text/text_block_generator/paragraph.py
Normal file
36
synthesis/text/text_block_generator/paragraph.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
import textwrap
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from faker import Faker
|
||||||
|
from funcy import identity, iterate, take, last
|
||||||
|
|
||||||
|
from cv_analysis.utils import star
|
||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
from synthesis.random import rnd
|
||||||
|
from synthesis.text.text_block_generator.text_block_generator import TextBlockGenerator
|
||||||
|
from synthesis.text.line_formatter.paragraph import ParagraphLineFormatter
|
||||||
|
|
||||||
|
|
||||||
|
class ParagraphGenerator(TextBlockGenerator):
|
||||||
|
def __init__(self):
|
||||||
|
self.line_formatter = ParagraphLineFormatter(blank_line_percentage=rnd.uniform(0, 0.5))
|
||||||
|
|
||||||
|
def __call__(self, rectangle, n_sentences):
|
||||||
|
return self.generate_paragraph(rectangle, n_sentences)
|
||||||
|
|
||||||
|
def generate_paragraph(self, rectangle, n_sentences):
|
||||||
|
lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences)
|
||||||
|
return lines
|
||||||
|
|
||||||
|
|
||||||
|
def generate_random_text_lines(rectangle: Rectangle, line_formatter=identity, n_sentences=3000) -> List[str]:
|
||||||
|
text = Faker().paragraph(nb_sentences=n_sentences, variable_nb_sentences=False, ext_word_list=None)
|
||||||
|
unformatted_lines = textwrap.wrap(text, width=rectangle.width, break_long_words=False)
|
||||||
|
# each iteration of the line formatter function formats one more line and adds it to the back of the list
|
||||||
|
formatted_lines_generator = iterate(star(line_formatter), (unformatted_lines, True))
|
||||||
|
# hence do as many iterations as there are lines in the rectangle
|
||||||
|
lines_per_iteration = take(len(unformatted_lines), formatted_lines_generator)
|
||||||
|
# and then take the lines from the last iteration of the function
|
||||||
|
formatted_lines, _ = last(lines_per_iteration)
|
||||||
|
|
||||||
|
return formatted_lines
|
||||||
@ -0,0 +1,5 @@
|
|||||||
|
import abc
|
||||||
|
|
||||||
|
|
||||||
|
class TextBlockGenerator(abc.ABC):
|
||||||
|
pass
|
||||||
@ -1,6 +1,11 @@
|
|||||||
|
import warnings
|
||||||
|
|
||||||
|
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
||||||
|
|
||||||
pytest_plugins = [
|
pytest_plugins = [
|
||||||
"test.fixtures.table_parsing",
|
"test.fixtures.table_parsing",
|
||||||
"test.fixtures.figure_detection",
|
"test.fixtures.figure_detection",
|
||||||
|
"test.fixtures.page_generation.page",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
8
test/data/paper/.gitignore
vendored
Normal file
8
test/data/paper/.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
/crumpled_paper.jpg
|
||||||
|
/digital_paper.jpg
|
||||||
|
/gray_paper.jpg
|
||||||
|
/rough_grain_paper.jpg
|
||||||
|
/crumpled.jpg
|
||||||
|
/digital.jpg
|
||||||
|
/plain.jpg
|
||||||
|
/rough_grain.jpg
|
||||||
4
test/data/paper/crumpled.jpg.dvc
Normal file
4
test/data/paper/crumpled.jpg.dvc
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
outs:
|
||||||
|
- md5: d38ebef85a0689bfd047edc98e4a5f93
|
||||||
|
size: 14131338
|
||||||
|
path: crumpled.jpg
|
||||||
4
test/data/paper/digital.jpg.dvc
Normal file
4
test/data/paper/digital.jpg.dvc
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
outs:
|
||||||
|
- md5: 8c4c96efe26731e14dd4a307dad718fd
|
||||||
|
size: 108546
|
||||||
|
path: digital.jpg
|
||||||
4
test/data/paper/plain.jpg.dvc
Normal file
4
test/data/paper/plain.jpg.dvc
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
outs:
|
||||||
|
- md5: 33741812aaff0e54849c5128ae2dccf4
|
||||||
|
size: 6924421
|
||||||
|
path: plain.jpg
|
||||||
4
test/data/paper/rough_grain.jpg.dvc
Normal file
4
test/data/paper/rough_grain.jpg.dvc
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
outs:
|
||||||
|
- md5: eb62925241917d55db05e07851f3f6b9
|
||||||
|
size: 1679152
|
||||||
|
path: rough_grain.jpg
|
||||||
0
test/fixtures/page_generation/__init__.py
vendored
Normal file
0
test/fixtures/page_generation/__init__.py
vendored
Normal file
266
test/fixtures/page_generation/page.py
vendored
Normal file
266
test/fixtures/page_generation/page.py
vendored
Normal file
@ -0,0 +1,266 @@
|
|||||||
|
import sys
|
||||||
|
from typing import Tuple, Iterable, List
|
||||||
|
|
||||||
|
import blend_modes
|
||||||
|
import numpy as np
|
||||||
|
import pytest
|
||||||
|
from PIL import Image, ImageEnhance
|
||||||
|
from PIL.Image import Transpose
|
||||||
|
from funcy import (
|
||||||
|
juxt,
|
||||||
|
compose,
|
||||||
|
identity,
|
||||||
|
)
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from cv_analysis.locations import TEST_PAGE_TEXTURES_DIR
|
||||||
|
from cv_analysis.utils.conversion import normalize_image_format_to_array, normalize_image_format_to_pil
|
||||||
|
from cv_analysis.utils.image_operations import blur, sharpen, overlay, superimpose
|
||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
from synthesis.content_generator import ContentGenerator
|
||||||
|
from synthesis.partitioner.two_column import TwoColumnPagePartitioner
|
||||||
|
from synthesis.random import rnd
|
||||||
|
from synthesis.segment.table.table import paste_contents
|
||||||
|
|
||||||
|
logger.remove()
|
||||||
|
logger.add(sys.stderr, level="INFO")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(
|
||||||
|
params=[
|
||||||
|
# "rough_grain",
|
||||||
|
# "plain",
|
||||||
|
# "digital",
|
||||||
|
"crumpled",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def base_texture(request, size):
|
||||||
|
texture = Image.open(TEST_PAGE_TEXTURES_DIR / (request.param + ".jpg"))
|
||||||
|
texture = texture.resize(size)
|
||||||
|
return texture
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(
|
||||||
|
params=[
|
||||||
|
# "portrait",
|
||||||
|
"landscape",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def orientation(request):
|
||||||
|
return request.param
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(
|
||||||
|
params=[
|
||||||
|
# 30,
|
||||||
|
100,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def dpi(request):
|
||||||
|
return request.param
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(
|
||||||
|
params=[
|
||||||
|
# "brown",
|
||||||
|
"sepia",
|
||||||
|
# "gray",
|
||||||
|
# "white",
|
||||||
|
# "light_red",
|
||||||
|
# "light_blue",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def color_name(request):
|
||||||
|
return request.param
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(
|
||||||
|
params=[
|
||||||
|
# "smooth",
|
||||||
|
# "coarse",
|
||||||
|
"neutral",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def texture_name(request):
|
||||||
|
return request.param
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(
|
||||||
|
params=[
|
||||||
|
# 30,
|
||||||
|
70,
|
||||||
|
# 150,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def color_intensity(request):
|
||||||
|
return request.param
|
||||||
|
|
||||||
|
|
||||||
|
def random_flip(image):
|
||||||
|
if rnd.choice([True, False]):
|
||||||
|
image = image.transpose(Transpose.FLIP_LEFT_RIGHT)
|
||||||
|
if rnd.choice([True, False]):
|
||||||
|
image = image.transpose(Transpose.FLIP_TOP_BOTTOM)
|
||||||
|
return image
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def color(color_name):
|
||||||
|
return {
|
||||||
|
"brown": "#7d6c5b",
|
||||||
|
"sepia": "#b8af88",
|
||||||
|
"gray": "#9c9c9c",
|
||||||
|
"white": "#ffffff",
|
||||||
|
"light_red": "#d68c8b",
|
||||||
|
"light_blue": "#8bd6d6",
|
||||||
|
}[color_name]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def texture_fn(texture_name, size):
|
||||||
|
if texture_name == "smooth":
|
||||||
|
fn = blur
|
||||||
|
elif texture_name == "coarse":
|
||||||
|
fn = compose(overlay, juxt(blur, sharpen))
|
||||||
|
else:
|
||||||
|
fn = identity
|
||||||
|
|
||||||
|
return normalize_image_function(fn)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_image_function(func):
|
||||||
|
def inner(image):
|
||||||
|
image = normalize_image_format_to_array(image)
|
||||||
|
image = func(image)
|
||||||
|
image = normalize_image_format_to_pil(image)
|
||||||
|
return image
|
||||||
|
|
||||||
|
return inner
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def texture(tinted_blank_page, base_texture):
|
||||||
|
texture = superimpose(base_texture, tinted_blank_page)
|
||||||
|
return texture
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def tinted_blank_page(size, color, color_intensity):
|
||||||
|
tinted_page = Image.new("RGBA", size, color)
|
||||||
|
tinted_page.putalpha(color_intensity)
|
||||||
|
return tinted_page
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def blank_page(size, color, color_intensity):
|
||||||
|
page = Image.new("RGBA", size, color=(255, 255, 255, 0))
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def size(dpi, orientation):
|
||||||
|
if orientation == "portrait":
|
||||||
|
size = (8.5 * dpi, 11 * dpi)
|
||||||
|
elif orientation == "landscape":
|
||||||
|
size = (11 * dpi, 8.5 * dpi)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown orientation: {orientation}")
|
||||||
|
size = tuple(map(int, size))
|
||||||
|
return size
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(
|
||||||
|
params=[
|
||||||
|
TwoColumnPagePartitioner,
|
||||||
|
# RandomPagePartitioner
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def page_partitioner(request):
|
||||||
|
return request.param()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def boxes(page_partitioner, blank_page):
|
||||||
|
boxes = page_partitioner(blank_page)
|
||||||
|
return boxes
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def prepared_texture(texture, texture_fn):
|
||||||
|
texture = random_flip(texture)
|
||||||
|
texture = texture_fn(texture)
|
||||||
|
return texture
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def content_boxes(boxes):
|
||||||
|
content_generator = ContentGenerator()
|
||||||
|
content_boxes = content_generator(boxes)
|
||||||
|
return content_boxes
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def page_with_opaque_content(
|
||||||
|
blank_page, tinted_blank_page, prepared_texture, content_boxes
|
||||||
|
) -> Tuple[np.ndarray, Iterable[Rectangle]]:
|
||||||
|
"""Creates a page with content"""
|
||||||
|
page = paste_contents(prepared_texture, content_boxes)
|
||||||
|
|
||||||
|
return page, content_boxes
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def page_with_translucent_content(
|
||||||
|
blank_page, tinted_blank_page, prepared_texture, content_boxes
|
||||||
|
) -> Tuple[np.ndarray, List[Rectangle]]:
|
||||||
|
"""Creates a page with content"""
|
||||||
|
page_content = paste_contents(blank_page, content_boxes)
|
||||||
|
page = blend_by_multiply(page_content, prepared_texture)
|
||||||
|
|
||||||
|
return page, content_boxes
|
||||||
|
|
||||||
|
|
||||||
|
def blend_by_multiply(page_content, texture):
|
||||||
|
def to_array(image: Image) -> np.ndarray:
|
||||||
|
return np.array(image).astype(np.float32)
|
||||||
|
|
||||||
|
texture.putalpha(255)
|
||||||
|
page_content.putalpha(255)
|
||||||
|
factor = 1.2
|
||||||
|
enhancer = ImageEnhance.Contrast(texture)
|
||||||
|
texture = enhancer.enhance(factor)
|
||||||
|
|
||||||
|
page = blend_modes.multiply(
|
||||||
|
*map(
|
||||||
|
to_array,
|
||||||
|
(
|
||||||
|
page_content,
|
||||||
|
texture,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
opacity=1,
|
||||||
|
).astype(np.uint8)
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="function")
|
||||||
|
def random_seeding():
|
||||||
|
from synthesis.segment.plot import pick_colormap
|
||||||
|
|
||||||
|
seed = str(rnd.randint(0, 2**32 - 1))
|
||||||
|
logger.info(f"Random seed: {seed}")
|
||||||
|
rnd.seed(seed)
|
||||||
|
pick_colormap.cache_clear()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def page_with_content(
|
||||||
|
random_seeding,
|
||||||
|
page_with_translucent_content,
|
||||||
|
# page_with_opaque_content,
|
||||||
|
) -> np.ndarray:
|
||||||
|
|
||||||
|
page, boxes = page_with_translucent_content
|
||||||
|
# page, boxes = page_with_opaque_content
|
||||||
|
|
||||||
|
return page, boxes
|
||||||
2
test/fixtures/server.py
vendored
2
test/fixtures/server.py
vendored
@ -6,7 +6,7 @@ import cv2
|
|||||||
import pytest
|
import pytest
|
||||||
from funcy import first
|
from funcy import first
|
||||||
|
|
||||||
from cv_analysis.utils.structures import Rectangle
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|||||||
6
test/fixtures/table_parsing.py
vendored
6
test/fixtures/table_parsing.py
vendored
@ -9,8 +9,8 @@ from loguru import logger
|
|||||||
|
|
||||||
from cv_analysis.config import get_config
|
from cv_analysis.config import get_config
|
||||||
from cv_analysis.locations import REPO_ROOT_PATH, TEST_DATA_DVC
|
from cv_analysis.locations import REPO_ROOT_PATH, TEST_DATA_DVC
|
||||||
from cv_analysis.utils.draw import draw_rectangles
|
from cv_analysis.utils.drawing import draw_rectangles
|
||||||
from cv_analysis.utils.open_pdf import open_pdf
|
from cv_analysis.utils.input import open_analysis_input_file
|
||||||
from test.fixtures.figure_detection import paste_text
|
from test.fixtures.figure_detection import paste_text
|
||||||
|
|
||||||
CV_CONFIG = get_config()
|
CV_CONFIG = get_config()
|
||||||
@ -19,7 +19,7 @@ CV_CONFIG = get_config()
|
|||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def client_page_with_table(test_file_index, dvc_test_data):
|
def client_page_with_table(test_file_index, dvc_test_data):
|
||||||
img_path = join(CV_CONFIG.test_data_dir, f"test{test_file_index}.png")
|
img_path = join(CV_CONFIG.test_data_dir, f"test{test_file_index}.png")
|
||||||
return first(open_pdf(img_path))
|
return first(open_analysis_input_file(img_path))
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
|
|||||||
19
test/page_generation_test.py
Normal file
19
test/page_generation_test.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
from PIL.Image import Image
|
||||||
|
|
||||||
|
from cv_analysis.utils.display import show_image
|
||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
|
||||||
|
|
||||||
|
def test_blank_page(page_with_content):
|
||||||
|
page, boxes = page_with_content
|
||||||
|
|
||||||
|
draw_boxes(page, boxes)
|
||||||
|
|
||||||
|
|
||||||
|
def draw_boxes(page: Image, boxes: Iterable[Rectangle]):
|
||||||
|
from cv_analysis.utils.drawing import draw_rectangles
|
||||||
|
|
||||||
|
page = draw_rectangles(page, boxes, filled=False, annotate=True)
|
||||||
|
show_image(page, backend="pil")
|
||||||
@ -3,6 +3,7 @@ from math import prod
|
|||||||
import cv2
|
import cv2
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from cv_analysis.utils.spacial import area
|
||||||
from test.utils.utils import powerset
|
from test.utils.utils import powerset
|
||||||
|
|
||||||
|
|
||||||
@ -15,21 +16,20 @@ class TestFindPrimaryTextRegions:
|
|||||||
|
|
||||||
@pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)])
|
@pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)])
|
||||||
def test_page_without_text_yields_figures(self, figure_detection_pipeline, page_with_images, image_size):
|
def test_page_without_text_yields_figures(self, figure_detection_pipeline, page_with_images, image_size):
|
||||||
results = figure_detection_pipeline(page_with_images)
|
result_rectangles = figure_detection_pipeline(page_with_images)
|
||||||
result_figures_size = map(lambda x: (x.w, x.h), results)
|
result_figure_sizes = map(lambda r: (r.width, r.height), result_rectangles)
|
||||||
|
|
||||||
assert all([image_size[0] < res[0] and image_size[1] < res[1] for res in result_figures_size])
|
assert all([image_size[0] < res[0] and image_size[1] < res[1] for res in result_figure_sizes])
|
||||||
|
|
||||||
@pytest.mark.parametrize("font_scale", [1, 1.5, 2])
|
@pytest.mark.parametrize("font_scale", [1, 1.5, 2])
|
||||||
@pytest.mark.parametrize("font_style", [cv2.FONT_HERSHEY_SIMPLEX, cv2.FONT_HERSHEY_COMPLEX])
|
@pytest.mark.parametrize("font_style", [cv2.FONT_HERSHEY_SIMPLEX, cv2.FONT_HERSHEY_COMPLEX])
|
||||||
@pytest.mark.parametrize("text_types", powerset(["body", "header", "caption"]))
|
@pytest.mark.parametrize("text_types", powerset(["body", "header", "caption"]))
|
||||||
@pytest.mark.parametrize("error_tolerance", [0.025])
|
@pytest.mark.parametrize("error_tolerance", [0.025])
|
||||||
def test_page_with_only_text_yields_no_figures(self, figure_detection_pipeline, page_with_text, error_tolerance):
|
def test_page_with_only_text_yields_no_figures(self, figure_detection_pipeline, page_with_text, error_tolerance):
|
||||||
results = figure_detection_pipeline(page_with_text)
|
result_rectangles = figure_detection_pipeline(page_with_text)
|
||||||
|
result_figure_areas = sum(map(area, result_rectangles))
|
||||||
result_figures_area = sum(map(lambda x: (x.w * x.h), results))
|
|
||||||
page_area = prod(page_with_text.shape)
|
page_area = prod(page_with_text.shape)
|
||||||
error = result_figures_area / page_area
|
error = result_figure_areas / page_area
|
||||||
|
|
||||||
assert error <= error_tolerance
|
assert error <= error_tolerance
|
||||||
|
|
||||||
@ -45,11 +45,11 @@ class TestFindPrimaryTextRegions:
|
|||||||
image_size,
|
image_size,
|
||||||
error_tolerance,
|
error_tolerance,
|
||||||
):
|
):
|
||||||
results = list(figure_detection_pipeline(page_with_images_and_text))
|
result_rectangles = list(figure_detection_pipeline(page_with_images_and_text))
|
||||||
|
|
||||||
result_figures_area = sum(map(lambda x: (x.w * x.h), results))
|
result_figure_areas = sum(map(area, result_rectangles))
|
||||||
expected_figure_area = prod(image_size)
|
expected_figure_area = prod(image_size)
|
||||||
|
|
||||||
error = abs(result_figures_area - expected_figure_area) / expected_figure_area
|
error = abs(result_figure_areas - expected_figure_area) / expected_figure_area
|
||||||
|
|
||||||
assert error <= error_tolerance
|
assert error <= error_tolerance
|
||||||
|
|||||||
0
test/unit_tests/layout_parsing_test.py
Normal file
0
test/unit_tests/layout_parsing_test.py
Normal file
@ -3,12 +3,11 @@ import numpy as np
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from cv_analysis.server.pipeline import table_parsing_formatter, figure_detection_formatter, make_analysis_pipeline
|
from cv_analysis.server.pipeline import table_parsing_formatter, figure_detection_formatter, make_analysis_pipeline
|
||||||
from cv_analysis.utils.structures import Rectangle
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
|
||||||
|
|
||||||
def analysis_fn_mock(image: np.ndarray):
|
def analysis_fn_mock(image: np.ndarray):
|
||||||
bbox = (0, 0, 42, 42)
|
return [Rectangle(0, 0, 42, 42)]
|
||||||
return [Rectangle.from_xyxy(bbox)]
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|||||||
@ -2,9 +2,12 @@ from itertools import starmap
|
|||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import pytest
|
import pytest
|
||||||
|
from funcy import lmap, compose, zipdict
|
||||||
|
|
||||||
from cv_analysis.table_parsing import parse_tables
|
from cv_analysis.table_parsing import parse_tables
|
||||||
from cv_analysis.utils.test_metrics import compute_document_score
|
from cv_analysis.utils import lift
|
||||||
|
from cv_analysis.utils.rectangle import Rectangle
|
||||||
|
from cv_analysis.utils.metrics import compute_document_score
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("score_threshold", [0.95])
|
@pytest.mark.parametrize("score_threshold", [0.95])
|
||||||
@ -12,8 +15,9 @@ from cv_analysis.utils.test_metrics import compute_document_score
|
|||||||
def test_table_parsing_on_client_pages(
|
def test_table_parsing_on_client_pages(
|
||||||
score_threshold, client_page_with_table, expected_table_annotation, test_file_index
|
score_threshold, client_page_with_table, expected_table_annotation, test_file_index
|
||||||
):
|
):
|
||||||
result = [x.json_xywh() for x in parse_tables(client_page_with_table)]
|
|
||||||
formatted_result = {"pages": [{"page": str(test_file_index), "cells": result}]}
|
results = compose(lift(rectangle_to_dict), parse_tables)(client_page_with_table)
|
||||||
|
formatted_result = {"pages": [{"cells": results}]}
|
||||||
|
|
||||||
score = compute_document_score(formatted_result, expected_table_annotation)
|
score = compute_document_score(formatted_result, expected_table_annotation)
|
||||||
|
|
||||||
@ -25,6 +29,14 @@ def error_tolerance(line_thickness):
|
|||||||
return line_thickness * 7
|
return line_thickness * 7
|
||||||
|
|
||||||
|
|
||||||
|
def rectangle_to_dict(rectangle: Rectangle):
|
||||||
|
return zipdict(["x", "y", "width", "height"], rectangle_to_xywh(rectangle))
|
||||||
|
|
||||||
|
|
||||||
|
def rectangle_to_xywh(rectangle: Rectangle):
|
||||||
|
return rectangle.x1, rectangle.y1, abs(rectangle.x1 - rectangle.x2), abs(rectangle.y1 - rectangle.y2)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("line_thickness", [1, 2, 3])
|
@pytest.mark.parametrize("line_thickness", [1, 2, 3])
|
||||||
@pytest.mark.parametrize("line_type", [cv2.LINE_4, cv2.LINE_AA, cv2.LINE_8])
|
@pytest.mark.parametrize("line_type", [cv2.LINE_4, cv2.LINE_AA, cv2.LINE_8])
|
||||||
@pytest.mark.parametrize("table_style", ["closed horizontal vertical", "open horizontal vertical"])
|
@pytest.mark.parametrize("table_style", ["closed horizontal vertical", "open horizontal vertical"])
|
||||||
@ -32,7 +44,7 @@ def error_tolerance(line_thickness):
|
|||||||
@pytest.mark.parametrize("background_color", [255, 220])
|
@pytest.mark.parametrize("background_color", [255, 220])
|
||||||
@pytest.mark.parametrize("table_shape", [(5, 8)])
|
@pytest.mark.parametrize("table_shape", [(5, 8)])
|
||||||
def test_table_parsing_on_generic_pages(page_with_table, expected_gold_page_with_table, error_tolerance):
|
def test_table_parsing_on_generic_pages(page_with_table, expected_gold_page_with_table, error_tolerance):
|
||||||
result = [x.xywh() for x in parse_tables(page_with_table)]
|
result = lmap(rectangle_to_xywh, parse_tables(page_with_table))
|
||||||
assert (
|
assert (
|
||||||
result == expected_gold_page_with_table
|
result == expected_gold_page_with_table
|
||||||
or average_error(result, expected_gold_page_with_table) <= error_tolerance
|
or average_error(result, expected_gold_page_with_table) <= error_tolerance
|
||||||
@ -46,8 +58,8 @@ def test_table_parsing_on_generic_pages(page_with_table, expected_gold_page_with
|
|||||||
@pytest.mark.parametrize("background_color", [255, 220])
|
@pytest.mark.parametrize("background_color", [255, 220])
|
||||||
@pytest.mark.parametrize("table_shape", [(5, 8)])
|
@pytest.mark.parametrize("table_shape", [(5, 8)])
|
||||||
@pytest.mark.xfail
|
@pytest.mark.xfail
|
||||||
def test_bad_qual_table(page_with_patchy_table, expected_gold_page_with_table, error_tolerance):
|
def test_low_quality_table(page_with_patchy_table, expected_gold_page_with_table, error_tolerance):
|
||||||
result = [x.xywh() for x in parse_tables(page_with_patchy_table)]
|
result = lmap(rectangle_to_xywh, parse_tables(page_with_patchy_table))
|
||||||
assert (
|
assert (
|
||||||
result == expected_gold_page_with_table
|
result == expected_gold_page_with_table
|
||||||
or average_error(result, expected_gold_page_with_table) <= error_tolerance
|
or average_error(result, expected_gold_page_with_table) <= error_tolerance
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user