Compare commits
156 Commits
master
...
refactorin
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
363d04ce5d | ||
|
|
510b39b537 | ||
|
|
223d3e6ed0 | ||
|
|
9efa37ae87 | ||
|
|
f9019d6625 | ||
|
|
66c65ce900 | ||
|
|
0e7791394f | ||
|
|
0f6e87b8a6 | ||
|
|
f12ef4b8ed | ||
|
|
7360226e98 | ||
|
|
43688d0f0b | ||
|
|
effc69c42f | ||
|
|
0be5849df1 | ||
|
|
6a7cff5bf5 | ||
|
|
fc0f19c5f1 | ||
|
|
fdbc49ccba | ||
|
|
61371153f6 | ||
|
|
86bd96db67 | ||
|
|
46146cc886 | ||
|
|
be0c643f75 | ||
|
|
4ec7cb8d7b | ||
|
|
dcdfe03f43 | ||
|
|
77c86078eb | ||
|
|
e952d19c68 | ||
|
|
2bcac91dea | ||
|
|
7facedb38a | ||
|
|
3113d5cb5d | ||
|
|
ba901473fe | ||
|
|
e8b4467265 | ||
|
|
4c65d906b8 | ||
|
|
667b4a4858 | ||
|
|
83e6dc3ce7 | ||
|
|
fb69eb7f5c | ||
|
|
f98256d7e9 | ||
|
|
cbb3a8cc61 | ||
|
|
9f9face8f0 | ||
|
|
f2af040c5b | ||
|
|
6dbe3b6fc9 | ||
|
|
a3fece8096 | ||
|
|
26180373a0 | ||
|
|
186b4530f0 | ||
|
|
a1ccda4ea9 | ||
|
|
25d35e2349 | ||
|
|
daea7d2bf7 | ||
|
|
d5e501a05d | ||
|
|
d9d363834a | ||
|
|
5dc13e7137 | ||
|
|
826cd3b6a9 | ||
|
|
4f788af35b | ||
|
|
10ea584143 | ||
|
|
7676a8148e | ||
|
|
cee5e69a4b | ||
|
|
e715c86f8d | ||
|
|
c5ba489931 | ||
|
|
3772ca021a | ||
|
|
c4eeb956ca | ||
|
|
d823ebf7c6 | ||
|
|
71ffb28381 | ||
|
|
9dfbe9a142 | ||
|
|
0eb57056ba | ||
|
|
70802d6341 | ||
|
|
52776494cb | ||
|
|
7d8842b4ac | ||
|
|
9e77e25afb | ||
|
|
b3480491be | ||
|
|
3d0c2396ee | ||
|
|
f8c2d691b2 | ||
|
|
ced1cd9559 | ||
|
|
738c51a337 | ||
|
|
48f6aebc13 | ||
|
|
73d546367c | ||
|
|
cfe4b58e38 | ||
|
|
839a264816 | ||
|
|
fd57fe99b7 | ||
|
|
5e51fd1d10 | ||
|
|
9c7c5e315f | ||
|
|
3da613af94 | ||
|
|
30e6350881 | ||
|
|
384f0e5f28 | ||
|
|
4d181448b6 | ||
|
|
a5cd3d6ec9 | ||
|
|
893622a73e | ||
|
|
4d11a157e5 | ||
|
|
4c10d521e2 | ||
|
|
0f6cbec1d5 | ||
|
|
54484d9ad0 | ||
|
|
ca190721d6 | ||
|
|
5611314ff3 | ||
|
|
4ecfe16df5 | ||
|
|
38c0614396 | ||
|
|
64565f9cb0 | ||
|
|
232c6bed4b | ||
|
|
8d34873d1c | ||
|
|
78a951a319 | ||
|
|
8d57d2043d | ||
|
|
41fdda4955 | ||
|
|
4dfdd579a2 | ||
|
|
e831ab1382 | ||
|
|
6fead2d9b9 | ||
|
|
1012988475 | ||
|
|
5bc1550eae | ||
|
|
29741fc5da | ||
|
|
4772e3037c | ||
|
|
dd6ab94aa2 | ||
|
|
eaca8725de | ||
|
|
4af202f098 | ||
|
|
1199845cdf | ||
|
|
4578413748 | ||
|
|
d5d67cb064 | ||
|
|
d8542762e6 | ||
|
|
caef416077 | ||
|
|
a8708ffc56 | ||
|
|
3f0bbf0fc7 | ||
|
|
2fec39eda6 | ||
|
|
16cc0007ed | ||
|
|
3d83489819 | ||
|
|
3134021596 | ||
|
|
3cb857d830 | ||
|
|
194102939e | ||
|
|
5d1d9516b5 | ||
|
|
77f85e9de1 | ||
|
|
c00081b2bc | ||
|
|
619f67f1fd | ||
|
|
a97f8def7c | ||
|
|
65e9735bd9 | ||
|
|
689be75478 | ||
|
|
acf46a7a48 | ||
|
|
0f11441b20 | ||
|
|
fa1fa15cc8 | ||
|
|
17c40c996a | ||
|
|
99af2943b5 | ||
|
|
0e6cb495e8 | ||
|
|
012e705e70 | ||
|
|
8327794685 | ||
|
|
72bc52dc7b | ||
|
|
557d091a54 | ||
|
|
b540cfd0f2 | ||
|
|
8824c5c3ea | ||
|
|
94e9210faf | ||
|
|
06d6863cc5 | ||
|
|
dfd87cb4b0 | ||
|
|
cd5457840b | ||
|
|
eee2f0e256 | ||
|
|
9d2f166fbf | ||
|
|
97fb4b645d | ||
|
|
00e53fb54d | ||
|
|
4be91de036 | ||
|
|
8c6b940364 | ||
|
|
cdb12baccd | ||
|
|
ac84494613 | ||
|
|
77f565c652 | ||
|
|
47e657aaa3 | ||
|
|
b592497b75 | ||
|
|
c0d961bc39 | ||
|
|
8260ae58f9 | ||
|
|
068f75d35b |
@ -1,17 +1,17 @@
|
||||
from functools import partial
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from funcy import lmap
|
||||
|
||||
from cv_analysis.figure_detection.figures import detect_large_coherent_structures
|
||||
from cv_analysis.figure_detection.text import remove_primary_text_regions
|
||||
from cv_analysis.utils.conversion import contour_to_rectangle
|
||||
from cv_analysis.utils.filters import (
|
||||
is_large_enough,
|
||||
has_acceptable_format,
|
||||
is_not_too_large,
|
||||
is_small_enough,
|
||||
)
|
||||
from cv_analysis.utils.postprocessing import remove_included
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
|
||||
|
||||
def detect_figures(image: np.array):
|
||||
@ -21,19 +21,18 @@ def detect_figures(image: np.array):
|
||||
figure_filter = partial(is_likely_figure, min_area, max_area, max_width_to_height_ratio)
|
||||
|
||||
image = remove_primary_text_regions(image)
|
||||
cnts = detect_large_coherent_structures(image)
|
||||
cnts = filter(figure_filter, cnts)
|
||||
contours = detect_large_coherent_structures(image)
|
||||
contours = filter(figure_filter, contours)
|
||||
|
||||
rects = map(cv2.boundingRect, cnts)
|
||||
rects = map(Rectangle.from_xywh, rects)
|
||||
rects = remove_included(rects)
|
||||
rectangles = lmap(contour_to_rectangle, contours)
|
||||
rectangles = remove_included(rectangles)
|
||||
|
||||
return rects
|
||||
return rectangles
|
||||
|
||||
|
||||
def is_likely_figure(min_area, max_area, max_width_to_height_ratio, cnts):
|
||||
def is_likely_figure(min_area, max_area, max_width_to_height_ratio, contours):
|
||||
return (
|
||||
is_not_too_large(cnts, max_area)
|
||||
and is_large_enough(cnts, min_area)
|
||||
and has_acceptable_format(cnts, max_width_to_height_ratio)
|
||||
is_small_enough(contours, max_area)
|
||||
and is_large_enough(contours, min_area)
|
||||
and has_acceptable_format(contours, max_width_to_height_ratio)
|
||||
)
|
||||
|
||||
@ -1,25 +1,33 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from cv_analysis.utils.common import find_contours_and_hierarchies
|
||||
|
||||
|
||||
def detect_large_coherent_structures(image: np.array):
|
||||
"""Detects large coherent structures on an image.
|
||||
"""Detects large coherent structures in an image.
|
||||
Expects an image with binary color space (e.g. threshold applied).
|
||||
|
||||
Args:
|
||||
image (np.array): Image to look for large coherent structures in.
|
||||
|
||||
Returns:
|
||||
contours
|
||||
list: List of contours.
|
||||
|
||||
References:
|
||||
https://stackoverflow.com/questions/60259169/how-to-group-nearby-contours-in-opencv-python-zebra-crossing-detection
|
||||
"""
|
||||
assert len(image.shape) == 2
|
||||
|
||||
# FIXME: Parameterize via factory
|
||||
dilate_kernel = cv2.getStructuringElement(cv2.MORPH_OPEN, (5, 5))
|
||||
# FIXME: Parameterize via factory
|
||||
dilate = cv2.dilate(image, dilate_kernel, iterations=4)
|
||||
|
||||
# FIXME: Parameterize via factory
|
||||
close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 20))
|
||||
close = cv2.morphologyEx(dilate, cv2.MORPH_CLOSE, close_kernel, iterations=1)
|
||||
# FIXME: Parameterize via factory
|
||||
close = cv2.morphologyEx(dilate, cv2.MORPH_CLOSE, close_kernel, iterations=1) # TODO: Tweak iterations
|
||||
|
||||
cnts, _ = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
contours, _ = find_contours_and_hierarchies(close)
|
||||
|
||||
return cnts
|
||||
return contours
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
import cv2
|
||||
|
||||
from cv_analysis.utils.common import normalize_to_gray_scale
|
||||
|
||||
|
||||
def remove_primary_text_regions(image):
|
||||
"""Removes regions of primary text, meaning no figure descriptions for example, but main text body paragraphs.
|
||||
@ -35,6 +37,7 @@ def remove_primary_text_regions(image):
|
||||
|
||||
def apply_threshold_to_image(image):
|
||||
"""Converts an image to black and white."""
|
||||
image = normalize_to_gray_scale(image)
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image
|
||||
return cv2.threshold(image, 253, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
|
||||
|
||||
|
||||
@ -1,87 +1,80 @@
|
||||
import itertools
|
||||
from itertools import compress
|
||||
from itertools import starmap
|
||||
from operator import __and__
|
||||
from functools import partial
|
||||
from typing import Iterable, List
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from funcy import compose, rcompose, lkeep
|
||||
|
||||
|
||||
from cv_analysis.utils.connect_rects import connect_related_rects2
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
from cv_analysis.utils.postprocessing import (
|
||||
remove_overlapping,
|
||||
remove_included,
|
||||
has_no_parent,
|
||||
from cv_analysis.utils import lstarkeep
|
||||
from cv_analysis.utils.common import (
|
||||
find_contours_and_hierarchies,
|
||||
dilate_page_components,
|
||||
normalize_to_gray_scale,
|
||||
threshold_image,
|
||||
invert_image,
|
||||
fill_rectangles,
|
||||
)
|
||||
from cv_analysis.utils.visual_logging import vizlogger
|
||||
|
||||
#could be dynamic parameter is the scan is noisy
|
||||
def is_likely_segment(rect, min_area=100):
|
||||
return cv2.contourArea(rect, False) > min_area
|
||||
from cv_analysis.utils.conversion import contour_to_rectangle
|
||||
from cv_analysis.utils.merging import merge_related_rectangles
|
||||
from cv_analysis.utils.postprocessing import remove_included, has_no_parent
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
|
||||
|
||||
def find_segments(image):
|
||||
contours, hierarchies = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
mask1 = map(is_likely_segment, contours)
|
||||
mask2 = map(has_no_parent, hierarchies[0])
|
||||
mask = starmap(__and__, zip(mask1, mask2))
|
||||
contours = compress(contours, mask)
|
||||
def parse_layout(image: np.array) -> List[Rectangle]:
|
||||
"""Parse the layout of a page.
|
||||
|
||||
rectangles = (cv2.boundingRect(c) for c in contours)
|
||||
Args:
|
||||
image: Image of the page.
|
||||
|
||||
Returns:
|
||||
List of rectangles representing the layout of the page as identified page elements.
|
||||
"""
|
||||
rectangles = rcompose(
|
||||
find_segments,
|
||||
remove_included,
|
||||
merge_related_rectangles,
|
||||
remove_included,
|
||||
)(image)
|
||||
|
||||
return rectangles
|
||||
|
||||
|
||||
def dilate_page_components(image):
|
||||
#if text is detected in words make kernel bigger
|
||||
image = cv2.GaussianBlur(image, (7, 7), 0)
|
||||
thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
|
||||
return cv2.dilate(thresh, kernel, iterations=4)
|
||||
def find_segments(image: np.ndarray) -> List[Rectangle]:
|
||||
"""Find segments in a page. Segments are structural elements of a page, such as text blocks, tables, etc."""
|
||||
rectangles = rcompose(
|
||||
prepare_for_initial_detection,
|
||||
__find_segments,
|
||||
partial(prepare_for_meta_detection, image.copy()),
|
||||
__find_segments,
|
||||
)(image)
|
||||
|
||||
return rectangles
|
||||
|
||||
|
||||
def fill_in_component_area(image, rect):
|
||||
x, y, w, h = rect
|
||||
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1)
|
||||
cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 7)
|
||||
_, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY)
|
||||
return ~image
|
||||
def prepare_for_initial_detection(image: np.ndarray) -> np.ndarray:
|
||||
return compose(dilate_page_components, normalize_to_gray_scale)(image)
|
||||
|
||||
|
||||
def __find_segments(image: np.ndarray) -> List[Rectangle]:
|
||||
def to_rectangle_if_valid(contour, hierarchy):
|
||||
return contour_to_rectangle(contour) if is_likely_segment(contour) and has_no_parent(hierarchy) else None
|
||||
|
||||
def parse_layout(image: np.array):
|
||||
image = image.copy()
|
||||
image_ = image.copy()
|
||||
rectangles = lstarkeep(to_rectangle_if_valid, zip(*find_contours_and_hierarchies(image)))
|
||||
|
||||
if len(image_.shape) > 2:
|
||||
image_ = cv2.cvtColor(image_, cv2.COLOR_BGR2GRAY)
|
||||
return rectangles
|
||||
|
||||
dilate = dilate_page_components(image_)
|
||||
# show_mpl(dilate)
|
||||
|
||||
rects = list(find_segments(dilate))
|
||||
def prepare_for_meta_detection(image: np.ndarray, rectangles: Iterable[Rectangle]) -> np.ndarray:
|
||||
image = rcompose(
|
||||
fill_rectangles,
|
||||
threshold_image,
|
||||
invert_image,
|
||||
normalize_to_gray_scale,
|
||||
)(image, rectangles)
|
||||
|
||||
# -> Run meta detection on the previous detections TODO: refactor
|
||||
for rect in rects:
|
||||
x, y, w, h = rect
|
||||
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 0), -1)
|
||||
cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 7)
|
||||
# show_mpl(image)
|
||||
_, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY)
|
||||
image = ~image
|
||||
# show_mpl(image)
|
||||
if len(image.shape) > 2:
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
return image
|
||||
|
||||
rects = find_segments(image)
|
||||
# <- End of meta detection
|
||||
rects = list(map(Rectangle.from_xywh, rects))
|
||||
rects = remove_included(rects)
|
||||
|
||||
rects = map(lambda r: r.xywh(), rects)
|
||||
rects = connect_related_rects2(rects)
|
||||
rects = list(map(Rectangle.from_xywh, rects))
|
||||
rects = remove_included(rects)
|
||||
|
||||
return rects
|
||||
def is_likely_segment(rectangle: Rectangle, min_area: float = 100) -> bool:
|
||||
# FIXME: Parameterize via factory
|
||||
return cv2.contourArea(rectangle, False) > min_area
|
||||
|
||||
@ -5,5 +5,10 @@ from pathlib import Path
|
||||
MODULE_PATH = Path(__file__).resolve().parents[0]
|
||||
PACKAGE_ROOT_PATH = MODULE_PATH.parents[0]
|
||||
REPO_ROOT_PATH = PACKAGE_ROOT_PATH
|
||||
|
||||
TEST_DIR_PATH = REPO_ROOT_PATH / "test"
|
||||
TEST_DATA_DVC = TEST_DIR_PATH / "test_data.dvc"
|
||||
TEST_DATA_DIR = TEST_DIR_PATH / "data"
|
||||
TEST_DATA_DIR_DVC = TEST_DIR_PATH / "data.dvc"
|
||||
TEST_DATA_SYNTHESIS_DIR = TEST_DATA_DIR / "synthesis"
|
||||
TEST_PAGE_TEXTURES_DIR = TEST_DATA_SYNTHESIS_DIR / "paper"
|
||||
TEST_SMILES_FILE = TEST_DATA_SYNTHESIS_DIR / "smiles.csv"
|
||||
|
||||
84
cv_analysis/logging.py
Normal file
84
cv_analysis/logging.py
Normal file
@ -0,0 +1,84 @@
|
||||
import sys
|
||||
from functools import wraps
|
||||
from operator import attrgetter
|
||||
from typing import Callable, Any
|
||||
|
||||
import loguru
|
||||
from funcy import log_calls, log_enters, log_exits
|
||||
|
||||
logger = loguru.logger
|
||||
logger.remove()
|
||||
|
||||
debug_logger = loguru.logger
|
||||
debug_logger.add(
|
||||
sink=sys.stderr,
|
||||
format="<blue>{time:YYYY-MM-DD at HH:mm:ss}</blue> | <level>{level: <8}</level> | <cyan>{name}</cyan>: <level>{message}</level>",
|
||||
level="DEBUG",
|
||||
)
|
||||
|
||||
dev_logger = loguru.logger
|
||||
dev_logger.add(
|
||||
sink=sys.stderr,
|
||||
format="<green>{time:YYYY-MM-DD at HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>: <level>{message}</level>",
|
||||
level="DEBUG",
|
||||
)
|
||||
|
||||
prod_logger = loguru.logger
|
||||
prod_logger.add(
|
||||
sink=sys.stderr,
|
||||
format="<white>{time:YYYY-MM-DD at HH:mm:ss}</white> | <level>{level: <8}</level> | <cyan>{name}</cyan>: <level>{message}</level>",
|
||||
level="INFO",
|
||||
enqueue=True,
|
||||
)
|
||||
|
||||
# logger.remove()
|
||||
# logger.add(sink=sys.stderr, level="DEBUG", enqueue=True)
|
||||
|
||||
|
||||
def __log(logger, level: str, enters=True, exits=True) -> Callable:
|
||||
print_func = get_print_func(logger, level)
|
||||
|
||||
def dec():
|
||||
if enters and exits:
|
||||
fn = log_calls
|
||||
elif enters:
|
||||
fn = log_enters
|
||||
elif exits:
|
||||
fn = log_exits
|
||||
else:
|
||||
raise ValueError("Must log either enters or exits")
|
||||
|
||||
return fn(print_func=print_func)
|
||||
|
||||
def inner(func: Callable) -> Callable:
|
||||
@dec()
|
||||
@wraps(func)
|
||||
def inner(*args, **kwargs) -> Any:
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return inner
|
||||
|
||||
return inner
|
||||
|
||||
|
||||
def get_print_func(logger, level: str):
|
||||
return attrgetter(level.lower())(logger)
|
||||
|
||||
|
||||
def debug_log(level: str = "TRACE", enters=True, exits=True) -> Callable:
|
||||
return __log(debug_logger, level, enters=enters, exits=exits)
|
||||
|
||||
|
||||
def dev_log(level: str = "TRACE", enters=True, exits=True) -> Callable:
|
||||
return __log(dev_logger, level, enters=enters, exits=exits)
|
||||
|
||||
|
||||
def prod_log(level: str = "TRACE", enters=True, exits=True) -> Callable:
|
||||
return __log(prod_logger, level, enters=enters, exits=exits)
|
||||
|
||||
|
||||
def delay(fn, *args, **kwargs):
|
||||
def inner():
|
||||
return fn(*args, **kwargs)
|
||||
|
||||
return inner
|
||||
@ -5,7 +5,7 @@ import numpy as np
|
||||
from iteration_utilities import starfilter, first
|
||||
|
||||
from cv_analysis.utils.filters import is_large_enough, is_filled, is_boxy
|
||||
from cv_analysis.utils.visual_logging import vizlogger
|
||||
from cv_analysis.utils.visual_logger import vizlogger
|
||||
|
||||
|
||||
def is_likely_redaction(contour, hierarchy, min_area):
|
||||
|
||||
@ -5,34 +5,29 @@ from funcy import lmap, flatten
|
||||
|
||||
from cv_analysis.figure_detection.figure_detection import detect_figures
|
||||
from cv_analysis.table_parsing import parse_tables
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
from pdf2img.conversion import convert_pages_to_images
|
||||
from pdf2img.default_objects.image import ImagePlus, ImageInfo
|
||||
from pdf2img.default_objects.rectangle import RectanglePlus
|
||||
|
||||
|
||||
def get_analysis_pipeline(operation, table_parsing_skip_pages_without_images):
|
||||
if operation == "table":
|
||||
return make_analysis_pipeline(
|
||||
parse_tables,
|
||||
table_parsing_formatter,
|
||||
dpi=200,
|
||||
skip_pages_without_images=table_parsing_skip_pages_without_images,
|
||||
)
|
||||
elif operation == "figure":
|
||||
return make_analysis_pipeline(detect_figures, figure_detection_formatter, dpi=200)
|
||||
def make_analysis_pipeline_for_element_type(segment_type, **kwargs):
|
||||
if segment_type == "table":
|
||||
return make_analysis_pipeline(parse_tables, table_parsing_formatter, dpi=200, **kwargs)
|
||||
elif segment_type == "figure":
|
||||
return make_analysis_pipeline(detect_figures, figure_detection_formatter, dpi=200, **kwargs)
|
||||
else:
|
||||
raise
|
||||
raise ValueError(f"Unknown segment type {segment_type}.")
|
||||
|
||||
|
||||
def make_analysis_pipeline(analysis_fn, formatter, dpi, skip_pages_without_images=False):
|
||||
def analyse_pipeline(pdf: bytes, index=None):
|
||||
def analysis_pipeline(pdf: bytes, index=None):
|
||||
def parse_page(page: ImagePlus):
|
||||
image = page.asarray()
|
||||
rects = analysis_fn(image)
|
||||
if not rects:
|
||||
rectangles = analysis_fn(image)
|
||||
if not rectangles:
|
||||
return
|
||||
infos = formatter(rects, page, dpi)
|
||||
infos = formatter(rectangles, page, dpi)
|
||||
return infos
|
||||
|
||||
pages = convert_pages_to_images(pdf, index=index, dpi=dpi, skip_pages_without_images=skip_pages_without_images)
|
||||
@ -40,22 +35,26 @@ def make_analysis_pipeline(analysis_fn, formatter, dpi, skip_pages_without_image
|
||||
|
||||
yield from flatten(filter(truth, results))
|
||||
|
||||
return analyse_pipeline
|
||||
return analysis_pipeline
|
||||
|
||||
|
||||
def table_parsing_formatter(rects, page: ImagePlus, dpi):
|
||||
def format_rect(rect: Rectangle):
|
||||
rect_plus = RectanglePlus.from_pixels(*rect.xyxy(), page.info, alpha=False, dpi=dpi)
|
||||
return rect_plus.asdict(derotate=True)
|
||||
def table_parsing_formatter(rectangles, page: ImagePlus, dpi):
|
||||
def format_rectangle(rectangle: Rectangle):
|
||||
rectangle_plus = RectanglePlus.from_pixels(*rectangle_to_xyxy(rectangle), page.info, alpha=False, dpi=dpi)
|
||||
return rectangle_plus.asdict(derotate=True)
|
||||
|
||||
bboxes = lmap(format_rect, rects)
|
||||
bboxes = lmap(format_rectangle, rectangles)
|
||||
|
||||
return {"pageInfo": page.asdict(natural_index=True), "tableCells": bboxes}
|
||||
|
||||
|
||||
def figure_detection_formatter(rects, page, dpi):
|
||||
def format_rect(rect: Rectangle):
|
||||
rect_plus = RectanglePlus.from_pixels(*rect.xyxy(), page.info, alpha=False, dpi=dpi)
|
||||
def figure_detection_formatter(rectangles, page, dpi):
|
||||
def format_rectangle(rectangle: Rectangle):
|
||||
rect_plus = RectanglePlus.from_pixels(*rectangle_to_xyxy(rectangle), page.info, alpha=False, dpi=dpi)
|
||||
return asdict(ImageInfo(page.info, rect_plus.asbbox(derotate=False), rect_plus.alpha))
|
||||
|
||||
return lmap(format_rect, rects)
|
||||
return lmap(format_rectangle, rectangles)
|
||||
|
||||
|
||||
def rectangle_to_xyxy(rectangle: Rectangle):
|
||||
return rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y2
|
||||
|
||||
@ -1,15 +1,11 @@
|
||||
from functools import partial
|
||||
from itertools import chain, starmap
|
||||
from operator import attrgetter
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from funcy import lmap, lfilter
|
||||
|
||||
from cv_analysis.layout_parsing import parse_layout
|
||||
from cv_analysis.utils.postprocessing import remove_isolated # xywh_to_vecs, xywh_to_vec_rect, adjacent1d
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
from cv_analysis.utils.visual_logging import vizlogger
|
||||
from cv_analysis.utils.conversion import box_to_rectangle
|
||||
from cv_analysis.utils.postprocessing import remove_isolated
|
||||
from cv_analysis.utils.visual_logger import vizlogger
|
||||
|
||||
|
||||
def add_external_contours(image, image_h_w_lines_only):
|
||||
@ -31,8 +27,7 @@ def apply_motion_blur(image: np.array, angle, size=80):
|
||||
size (int): kernel size; 80 found empirically to work well
|
||||
|
||||
Returns:
|
||||
np.array
|
||||
|
||||
np.ndarray
|
||||
"""
|
||||
k = np.zeros((size, size), dtype=np.float32)
|
||||
vizlogger.debug(k, "tables08_blur_kernel1.png")
|
||||
@ -55,10 +50,9 @@ def isolate_vertical_and_horizontal_components(img_bin):
|
||||
|
||||
Args:
|
||||
img_bin (np.array): array corresponding to single binarized page image
|
||||
bounding_rects (list): list of layout boxes of the form (x, y, w, h), potentially containing tables
|
||||
|
||||
Returns:
|
||||
np.array
|
||||
np.ndarray
|
||||
"""
|
||||
line_min_width = 48
|
||||
kernel_h = np.ones((1, line_min_width), np.uint8)
|
||||
@ -90,10 +84,9 @@ def find_table_layout_boxes(image: np.array):
|
||||
def is_large_enough(box):
|
||||
(x, y, w, h) = box
|
||||
if w * h >= 100000:
|
||||
return Rectangle.from_xywh(box)
|
||||
return box_to_rectangle(box)
|
||||
|
||||
layout_boxes = parse_layout(image)
|
||||
a = lmap(is_large_enough, layout_boxes)
|
||||
return lmap(is_large_enough, layout_boxes)
|
||||
|
||||
|
||||
@ -103,7 +96,7 @@ def preprocess(image: np.array):
|
||||
return ~image
|
||||
|
||||
|
||||
def turn_connected_components_into_rects(image: np.array):
|
||||
def turn_connected_components_into_rectangles(image: np.array):
|
||||
def is_large_enough(stat):
|
||||
x1, y1, w, h, area = stat
|
||||
return area > 2000 and w > 35 and h > 25
|
||||
@ -117,7 +110,7 @@ def turn_connected_components_into_rects(image: np.array):
|
||||
return []
|
||||
|
||||
|
||||
def parse_tables(image: np.array, show=False):
|
||||
def parse_tables(image: np.array):
|
||||
"""Runs the full table parsing process.
|
||||
|
||||
Args:
|
||||
@ -129,11 +122,8 @@ def parse_tables(image: np.array, show=False):
|
||||
|
||||
image = preprocess(image)
|
||||
image = isolate_vertical_and_horizontal_components(image)
|
||||
rects = turn_connected_components_into_rects(image)
|
||||
#print(rects, "\n\n")
|
||||
rects = list(map(Rectangle.from_xywh, rects))
|
||||
#print(rects, "\n\n")
|
||||
rects = remove_isolated(rects)
|
||||
#print(rects, "\n\n")
|
||||
|
||||
return rects
|
||||
boxes = turn_connected_components_into_rectangles(image)
|
||||
rectangles = lmap(box_to_rectangle, boxes)
|
||||
rectangles = remove_isolated(rectangles)
|
||||
|
||||
return rectangles
|
||||
|
||||
51
cv_analysis/utils/common.py
Normal file
51
cv_analysis/utils/common.py
Normal file
@ -0,0 +1,51 @@
|
||||
from functools import reduce
|
||||
from typing import Iterable
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from funcy import first
|
||||
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
|
||||
|
||||
def find_contours_and_hierarchies(image):
|
||||
contours, hierarchies = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
return contours, first(hierarchies) if hierarchies is not None else None
|
||||
|
||||
|
||||
def dilate_page_components(image: np.ndarray) -> np.ndarray:
|
||||
# FIXME: Parameterize via factory
|
||||
image = cv2.GaussianBlur(image, (7, 7), 0)
|
||||
# FIXME: Parameterize via factory
|
||||
thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
|
||||
# FIXME: Parameterize via factory
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
|
||||
# FIXME: Parameterize via factory
|
||||
dilate = cv2.dilate(thresh, kernel, iterations=4)
|
||||
return dilate
|
||||
|
||||
|
||||
def normalize_to_gray_scale(image: np.ndarray) -> np.ndarray:
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) > 2 else image
|
||||
return image
|
||||
|
||||
|
||||
def threshold_image(image: np.ndarray) -> np.ndarray:
|
||||
# FIXME: Parameterize via factory
|
||||
_, image = cv2.threshold(image, 254, 255, cv2.THRESH_BINARY)
|
||||
return image
|
||||
|
||||
|
||||
def invert_image(image: np.ndarray):
|
||||
return ~image
|
||||
|
||||
|
||||
def fill_rectangles(image: np.ndarray, rectangles: Iterable[Rectangle]) -> np.ndarray:
|
||||
image = reduce(fill_in_component_area, rectangles, image)
|
||||
return image
|
||||
|
||||
|
||||
def fill_in_component_area(image: np.ndarray, rectangle: Rectangle) -> np.ndarray:
|
||||
cv2.rectangle(image, (rectangle.x1, rectangle.y1), (rectangle.x2, rectangle.y2), (0, 0, 0), -1)
|
||||
cv2.rectangle(image, (rectangle.x1, rectangle.y1), (rectangle.x2, rectangle.y2), (255, 255, 255), 7)
|
||||
return image
|
||||
@ -1,120 +0,0 @@
|
||||
from itertools import combinations, starmap, product
|
||||
from typing import Iterable
|
||||
|
||||
|
||||
def is_near_enough(rect_pair, max_gap=14):
|
||||
x1, y1, w1, h1 = rect_pair[0]
|
||||
x2, y2, w2, h2 = rect_pair[1]
|
||||
|
||||
return any([abs(x1 - (x2 + w2)) <= max_gap,
|
||||
abs(x2 - (x1 + w1)) <= max_gap,
|
||||
abs(y2 - (y1 + h1)) <= max_gap,
|
||||
abs(y1 - (y2 + h2)) <= max_gap])
|
||||
|
||||
|
||||
def is_overlapping(rect_pair):
|
||||
x1, y1, w1, h1 = rect_pair[0]
|
||||
x2, y2, w2, h2 = rect_pair[1]
|
||||
dx = min(x1 + w1, x2 + w2) - max(x1, x2)
|
||||
dy = min(y1 + h1, y2 + h2) - max(y1, y2)
|
||||
return True if (dx >= 0) and (dy >= 0) else False
|
||||
|
||||
|
||||
def is_on_same_line(rect_pair):
|
||||
x1, y1, w1, h1 = rect_pair[0]
|
||||
x2, y2, w2, h2 = rect_pair[1]
|
||||
return any([any([abs(y1 - y2) <= 10,
|
||||
abs(y1 + h1 - (y2 + h2)) <= 10]),
|
||||
any([y2 <= y1 and y1 + h1 <= y2 + h2,
|
||||
y1 <= y2 and y2 + h2 <= y1 + h1])])
|
||||
|
||||
|
||||
def has_correct_position1(rect_pair):
|
||||
x1, y1, w1, h1 = rect_pair[0]
|
||||
x2, y2, w2, h2 = rect_pair[1]
|
||||
return any([any([abs(x1 - x2) <= 10,
|
||||
abs(y1 - y2) <= 10,
|
||||
abs(x1 + w1 - (x2 + w2)) <= 10,
|
||||
abs(y1 + h1 - (y2 + h2)) <= 10]),
|
||||
any([y2 <= y1 and y1 + h1 <= y2 + h2,
|
||||
y1 <= y2 and y2 + h2 <= y1 + h1,
|
||||
x2 <= x1 and x1 + w1 <= x2 + w2,
|
||||
x1 <= x2 and x2 + w2 <= x1 + w1])])
|
||||
|
||||
|
||||
def is_related(rect_pair):
|
||||
return (is_near_enough(rect_pair) and has_correct_position1(rect_pair)) or is_overlapping(
|
||||
rect_pair)
|
||||
|
||||
|
||||
def fuse_rects(rect1, rect2):
|
||||
if rect1 == rect2:
|
||||
return rect1
|
||||
x1, y1, w1, h1 = rect1
|
||||
x2, y2, w2, h2 = rect2
|
||||
|
||||
topleft = list(min(product([x1, x2], [y1, y2])))
|
||||
bottomright = list(max(product([x1 + w1, x2 + w2], [y1 + h1, y2 + h2])))
|
||||
|
||||
w = [bottomright[0] - topleft[0]]
|
||||
h = [bottomright[1] - topleft[1]]
|
||||
return tuple(topleft + w + h)
|
||||
|
||||
|
||||
def rects_not_the_same(r):
|
||||
return r[0] != r[1]
|
||||
|
||||
|
||||
def find_related_rects(rects):
|
||||
rect_pairs = list(filter(is_related, combinations(rects, 2)))
|
||||
rect_pairs = list(filter(rects_not_the_same, rect_pairs))
|
||||
if not rect_pairs:
|
||||
return [], rects
|
||||
rel_rects = list(set([rect for pair in rect_pairs for rect in pair]))
|
||||
unrel_rects = [rect for rect in rects if rect not in rel_rects]
|
||||
return rect_pairs, unrel_rects
|
||||
|
||||
|
||||
def connect_related_rects(rects):
|
||||
rects_to_connect, rects_new = find_related_rects(rects)
|
||||
|
||||
while len(rects_to_connect) > 0:
|
||||
rects_fused = list(starmap(fuse_rects, rects_to_connect))
|
||||
rects_fused = list(dict.fromkeys(rects_fused))
|
||||
|
||||
if len(rects_fused) == 1:
|
||||
rects_new += rects_fused
|
||||
rects_fused = []
|
||||
|
||||
rects_to_connect, connected_rects = find_related_rects(rects_fused)
|
||||
rects_new += connected_rects
|
||||
|
||||
if len(rects_to_connect) > 1 and len(set(rects_to_connect)) == 1:
|
||||
rects_new.append(rects_fused[0])
|
||||
rects_to_connect = []
|
||||
|
||||
return rects_new
|
||||
|
||||
|
||||
def connect_related_rects2(rects: Iterable[tuple]):
|
||||
rects = list(rects)
|
||||
current_idx = 0
|
||||
|
||||
while True:
|
||||
if current_idx + 1 >= len(rects) or len(rects) <= 1:
|
||||
break
|
||||
merge_happened = False
|
||||
current_rect = rects.pop(current_idx)
|
||||
for idx, maybe_related_rect in enumerate(rects):
|
||||
if is_related((current_rect, maybe_related_rect)):
|
||||
current_rect = fuse_rects(current_rect, maybe_related_rect)
|
||||
rects.pop(idx)
|
||||
merge_happened = True
|
||||
break
|
||||
rects.insert(0, current_rect)
|
||||
if not merge_happened:
|
||||
current_idx += 1
|
||||
elif merge_happened:
|
||||
current_idx = 0
|
||||
|
||||
return rects
|
||||
47
cv_analysis/utils/conversion.py
Normal file
47
cv_analysis/utils/conversion.py
Normal file
@ -0,0 +1,47 @@
|
||||
import json
|
||||
from typing import Sequence, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
|
||||
Image_t = Union[Image.Image, np.ndarray]
|
||||
|
||||
|
||||
def contour_to_rectangle(contour):
|
||||
return box_to_rectangle(cv2.boundingRect(contour))
|
||||
|
||||
|
||||
def box_to_rectangle(box: Sequence[int]) -> Rectangle:
|
||||
x, y, w, h = box
|
||||
return Rectangle(x, y, x + w, y + h)
|
||||
|
||||
|
||||
def rectangle_to_box(rectangle: Rectangle) -> Sequence[int]:
|
||||
return [rectangle.x1, rectangle.y1, rectangle.width, rectangle.height]
|
||||
|
||||
|
||||
class RectangleJSONEncoder(json.JSONEncoder):
|
||||
def __init__(self, *args, **kwargs):
|
||||
json.JSONEncoder.__init__(self, *args, **kwargs)
|
||||
self._replacement_map = {}
|
||||
|
||||
def default(self, o):
|
||||
if isinstance(o, Rectangle):
|
||||
return {"x1": o.x1, "x2": o.x2, "y1": o.y1, "y2": o.y2}
|
||||
else:
|
||||
return json.JSONEncoder.default(self, o)
|
||||
|
||||
def encode(self, o):
|
||||
result = json.JSONEncoder.encode(self, o)
|
||||
return result
|
||||
|
||||
|
||||
def normalize_image_format_to_array(image: Image_t):
|
||||
return np.array(image).astype(np.uint8) if isinstance(image, Image.Image) else image
|
||||
|
||||
|
||||
def normalize_image_format_to_pil(image: Image_t):
|
||||
return Image.fromarray(image.astype(np.uint8)) if isinstance(image, np.ndarray) else image
|
||||
@ -1,33 +1,51 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from PIL.Image import Image as Image_t
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
from cv_analysis.utils.conversion import normalize_image_format_to_array
|
||||
|
||||
def show_image_cv2(image, maxdim=700):
|
||||
|
||||
def show_image(image, backend="mpl", **kwargs):
|
||||
image = normalize_image_format_to_array(image)
|
||||
if backend == "mpl":
|
||||
show_image_mpl(image, **kwargs)
|
||||
elif backend == "cv2":
|
||||
show_image_cv2(image, **kwargs)
|
||||
elif backend == "pil":
|
||||
Image.fromarray(image).show()
|
||||
else:
|
||||
raise ValueError(f"Unknown backend: {backend}")
|
||||
|
||||
|
||||
def show_image_cv2(image, maxdim=700, **kwargs):
|
||||
h, w, c = image.shape
|
||||
maxhw = max(h, w)
|
||||
if maxhw > maxdim:
|
||||
ratio = maxdim / maxhw
|
||||
h = int(h * ratio)
|
||||
w = int(w * ratio)
|
||||
img = cv2.resize(image, (h, w))
|
||||
|
||||
img = cv2.resize(image, (h, w))
|
||||
cv2.imshow("", img)
|
||||
cv2.waitKey(0)
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def show_image_mpl(image):
|
||||
def show_image_mpl(image, **kwargs):
|
||||
if isinstance(image, Image_t):
|
||||
# noinspection PyTypeChecker
|
||||
image = np.array(image)
|
||||
# noinspection PyArgumentList
|
||||
assert image.max() <= 255
|
||||
fig, ax = plt.subplots(1, 1)
|
||||
fig.set_size_inches(20, 20)
|
||||
assert image.dtype == np.uint8
|
||||
ax.imshow(image, cmap="gray")
|
||||
ax.title.set_text(kwargs.get("title", ""))
|
||||
plt.show()
|
||||
|
||||
|
||||
def show_image(image, backend="m"):
|
||||
if backend.startswith("m"):
|
||||
show_image_mpl(image)
|
||||
else:
|
||||
show_image_cv2(image)
|
||||
|
||||
|
||||
def save_image(image, path):
|
||||
cv2.imwrite(path, image)
|
||||
|
||||
@ -1,19 +1,23 @@
|
||||
from typing import Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from cv_analysis.utils import copy_and_normalize_channels
|
||||
|
||||
|
||||
def draw_contours(image, contours, color=None, annotate=False):
|
||||
def draw_contours(image, contours):
|
||||
|
||||
image = copy_and_normalize_channels(image)
|
||||
|
||||
for cont in contours:
|
||||
cv2.drawContours(image, cont, -1, (0, 255, 0), 4)
|
||||
for contour in contours:
|
||||
cv2.drawContours(image, contour, -1, (0, 255, 0), 4)
|
||||
|
||||
return image
|
||||
|
||||
|
||||
def draw_rectangles(image, rectangles, color=None, annotate=False):
|
||||
def draw_rectangles(image: Union[np.ndarray, Image.Image], rectangles, color=None, annotate=False, filled=False):
|
||||
def annotate_rect(x, y, w, h):
|
||||
cv2.putText(
|
||||
image,
|
||||
@ -21,18 +25,18 @@ def draw_rectangles(image, rectangles, color=None, annotate=False):
|
||||
(x + (w // 2) - 12, y + (h // 2) + 9),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
(0, 255, 0, 255),
|
||||
2,
|
||||
)
|
||||
|
||||
image = copy_and_normalize_channels(image)
|
||||
|
||||
if not color:
|
||||
color = (0, 255, 0)
|
||||
color = (0, 255, 0, 255)
|
||||
|
||||
for rect in rectangles:
|
||||
x, y, w, h = rect
|
||||
cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
|
||||
cv2.rectangle(image, (x, y), (x + w, y + h), color, -1 if filled else 1)
|
||||
if annotate:
|
||||
annotate_rect(x, y, w, h)
|
||||
|
||||
@ -5,7 +5,7 @@ def is_large_enough(cont, min_area):
|
||||
return cv2.contourArea(cont, False) > min_area
|
||||
|
||||
|
||||
def is_not_too_large(cnt, max_area):
|
||||
def is_small_enough(cnt, max_area):
|
||||
return cv2.contourArea(cnt, False) < max_area
|
||||
|
||||
|
||||
|
||||
13
cv_analysis/utils/geometric.py
Normal file
13
cv_analysis/utils/geometric.py
Normal file
@ -0,0 +1,13 @@
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
|
||||
|
||||
def is_square_like(box: Rectangle):
|
||||
return box.width / box.height > 0.5 and box.height / box.width > 0.5
|
||||
|
||||
|
||||
def is_wide(box: Rectangle):
|
||||
return box.width / box.height > 1.5
|
||||
|
||||
|
||||
def is_tall(box: Rectangle):
|
||||
return box.height / box.width > 1.5
|
||||
115
cv_analysis/utils/image_operations.py
Normal file
115
cv_analysis/utils/image_operations.py
Normal file
@ -0,0 +1,115 @@
|
||||
from typing import Tuple
|
||||
|
||||
import cv2 as cv
|
||||
import numpy as np
|
||||
from PIL import ImageOps, Image
|
||||
from loguru import logger
|
||||
|
||||
from cv_analysis.utils.conversion import normalize_image_format_to_pil
|
||||
|
||||
Color = Tuple[int, int, int]
|
||||
|
||||
|
||||
def blur(image: np.ndarray):
|
||||
return cv.blur(image, (3, 3))
|
||||
|
||||
|
||||
def sharpen(image: np.ndarray):
|
||||
return cv.filter2D(image, -1, np.array([[-1, -1, -1], [-1, 6, -1], [-1, -1, -1]]))
|
||||
|
||||
|
||||
def overlay(images, mode=np.sum):
|
||||
assert mode in [np.sum, np.max]
|
||||
images = np.stack(list(images))
|
||||
image = mode(images, axis=0)
|
||||
image = (image / image.max() * 255).astype(np.uint8)
|
||||
return image
|
||||
|
||||
|
||||
def tint_image(src, color="#FFFFFF"):
|
||||
src.load()
|
||||
r, g, b, alpha = src.split()
|
||||
gray = ImageOps.grayscale(src)
|
||||
result = ImageOps.colorize(gray, (0, 0, 0), color)
|
||||
result.putalpha(alpha)
|
||||
return result
|
||||
|
||||
|
||||
def color_shift_array(image: np.ndarray, color: Color):
|
||||
"""Creates a 3-tensor from a 2-tensor by stacking the 2-tensor three times weighted by the color tuple."""
|
||||
assert image.ndim == 3
|
||||
assert image.shape[-1] == 3
|
||||
assert isinstance(color, tuple)
|
||||
assert max(color) <= 255
|
||||
assert image.max() <= 255
|
||||
|
||||
color = np.array(color)
|
||||
weights = color / color.sum() / 10
|
||||
assert max(weights) <= 1
|
||||
|
||||
colored = (image * weights).astype(np.uint8)
|
||||
|
||||
assert colored.shape == image.shape
|
||||
|
||||
return colored
|
||||
|
||||
|
||||
def superimpose(
|
||||
base_image: Image,
|
||||
image_to_superimpose: Image,
|
||||
crop_to_content=True,
|
||||
pad=True,
|
||||
) -> Image:
|
||||
"""Superimposes an image with transparency onto another image.
|
||||
|
||||
Args:
|
||||
base_image: The page image.
|
||||
image_to_superimpose: The texture image.
|
||||
crop_to_content: If True, the texture will be cropped to content (i.e. the bounding box of all non-transparent
|
||||
parts of the texture image).
|
||||
pad: If True, the texture will be padded to the size of the page.
|
||||
|
||||
Returns:
|
||||
Image where the texture is superimposed onto the page.
|
||||
"""
|
||||
base_image = normalize_image_format_to_pil(base_image)
|
||||
image_to_superimpose = normalize_image_format_to_pil(image_to_superimpose)
|
||||
|
||||
if crop_to_content:
|
||||
image_to_superimpose = image_to_superimpose.crop(image_to_superimpose.getbbox())
|
||||
|
||||
if base_image.size != image_to_superimpose.size:
|
||||
logger.trace(f"Size of page and texture do not match: {base_image.size} != {image_to_superimpose.size}")
|
||||
if pad:
|
||||
logger.trace(f"Padding texture before pasting to fit size {base_image.size}")
|
||||
image_to_superimpose = pad_image_to_size(image_to_superimpose, base_image.size)
|
||||
else:
|
||||
logger.trace(f"Resizing texture before pasting to fit size {base_image.size}")
|
||||
image_to_superimpose = image_to_superimpose.resize(base_image.size)
|
||||
|
||||
assert base_image.size == image_to_superimpose.size
|
||||
assert image_to_superimpose.mode == "RGBA"
|
||||
|
||||
base_image.paste(image_to_superimpose, (0, 0), image_to_superimpose)
|
||||
return base_image
|
||||
|
||||
|
||||
def pad_image_to_size(image: Image, size: Tuple[int, int]) -> Image:
|
||||
"""Pads an image to a given size."""
|
||||
if image.size == size:
|
||||
return image
|
||||
|
||||
if image.size[0] > size[0] or image.size[1] > size[1]:
|
||||
raise ValueError(f"Image size {image.size} is larger than target size {size}.")
|
||||
|
||||
padded = Image.new(image.mode, size, color=255)
|
||||
|
||||
pasting_coords = compute_pasting_coordinates(image, padded)
|
||||
assert image.mode == "RGBA"
|
||||
padded.paste(image, pasting_coords)
|
||||
return padded
|
||||
|
||||
|
||||
def compute_pasting_coordinates(smaller: Image, larger: Image.Image):
|
||||
"""Computes the coordinates for centrally pasting a smaller image onto a larger image."""
|
||||
return abs(larger.width - smaller.width) // 2, abs(larger.height - smaller.height) // 2
|
||||
29
cv_analysis/utils/input.py
Normal file
29
cv_analysis/utils/input.py
Normal file
@ -0,0 +1,29 @@
|
||||
from numpy import array, ndarray
|
||||
import pdf2image
|
||||
from PIL import Image
|
||||
|
||||
from cv_analysis.utils.preprocessing import preprocess_page_array
|
||||
|
||||
|
||||
def open_analysis_input_file(path_or_bytes, first_page=1, last_page=None):
|
||||
|
||||
assert first_page > 0, "Page numbers are 1-based."
|
||||
assert last_page is None or last_page >= first_page, "last_page must be greater than or equal to first_page."
|
||||
|
||||
last_page = last_page or first_page
|
||||
|
||||
if type(path_or_bytes) == str:
|
||||
if path_or_bytes.lower().endswith((".png", ".jpg", ".jpeg")):
|
||||
pages = [Image.open(path_or_bytes)]
|
||||
elif path_or_bytes.lower().endswith(".pdf"):
|
||||
pages = pdf2image.convert_from_path(path_or_bytes, first_page=first_page, last_page=last_page)
|
||||
else:
|
||||
raise IOError("Invalid file extension. Accepted filetypes: .png, .jpg, .jpeg, .pdf")
|
||||
elif type(path_or_bytes) == bytes:
|
||||
pages = pdf2image.convert_from_bytes(path_or_bytes, first_page=first_page, last_page=last_page)
|
||||
elif type(path_or_bytes) in {list, ndarray}:
|
||||
return path_or_bytes
|
||||
|
||||
pages = [preprocess_page_array(array(p)) for p in pages]
|
||||
|
||||
return pages
|
||||
54
cv_analysis/utils/merging.py
Normal file
54
cv_analysis/utils/merging.py
Normal file
@ -0,0 +1,54 @@
|
||||
from functools import reduce
|
||||
from itertools import combinations
|
||||
from typing import List, Tuple, Set
|
||||
|
||||
from funcy import all
|
||||
|
||||
from cv_analysis.utils import until, make_merger_sentinel
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
from cv_analysis.utils.spacial import related
|
||||
|
||||
|
||||
def merge_related_rectangles(rectangles: List[Rectangle]) -> List[Rectangle]:
|
||||
"""Merges rectangles that are related to each other, iterating on partial merge results until no more mergers are
|
||||
possible."""
|
||||
assert isinstance(rectangles, list)
|
||||
no_new_merges = make_merger_sentinel()
|
||||
return until(no_new_merges, merge_rectangles_once, rectangles)
|
||||
|
||||
|
||||
def merge_rectangles_once(rectangles: List[Rectangle]) -> List[Rectangle]:
|
||||
"""Merges rectangles that are related to each other, but does not iterate on the results."""
|
||||
rectangles = set(rectangles)
|
||||
merged, used = reduce(merge_if_related, combinations(rectangles, 2), (set(), set()))
|
||||
|
||||
return list(merged | rectangles - used)
|
||||
|
||||
|
||||
T = Tuple[Set[Rectangle], Set[Rectangle]]
|
||||
V = Tuple[Rectangle, Rectangle]
|
||||
|
||||
|
||||
def merge_if_related(merged_and_used_so_far: T, rectangle_pair: V) -> T:
|
||||
"""Merges two rectangles if they are related, otherwise returns the accumulator unchanged."""
|
||||
alpha, beta = rectangle_pair
|
||||
merged, used = merged_and_used_so_far
|
||||
|
||||
def unused(*args) -> bool:
|
||||
return not used & {*args}
|
||||
|
||||
if all(unused, (alpha, beta)) and related(alpha, beta):
|
||||
return merged | {bounding_rect(alpha, beta)}, used | {alpha, beta}
|
||||
|
||||
else:
|
||||
return merged, used
|
||||
|
||||
|
||||
def bounding_rect(alpha: Rectangle, beta: Rectangle) -> Rectangle:
|
||||
"""Returns the smallest rectangle that contains both rectangles."""
|
||||
return Rectangle(
|
||||
min(alpha.x1, beta.x1),
|
||||
min(alpha.y1, beta.y1),
|
||||
max(alpha.x2, beta.x2),
|
||||
max(alpha.y2, beta.y2),
|
||||
)
|
||||
56
cv_analysis/utils/metrics.py
Normal file
56
cv_analysis/utils/metrics.py
Normal file
@ -0,0 +1,56 @@
|
||||
from functools import reduce
|
||||
from operator import itemgetter
|
||||
from typing import Iterable
|
||||
|
||||
import numpy as np
|
||||
from funcy import lmap, lpluck, first
|
||||
|
||||
from cv_analysis.utils import lift
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
|
||||
|
||||
def compute_document_score(result_dict, ground_truth_dicts):
|
||||
|
||||
extract_cells = lambda dicts: lpluck("cells", dicts["pages"])
|
||||
|
||||
cells_per_ground_truth_page, cells_per_result_page = map(extract_cells, (ground_truth_dicts, result_dict))
|
||||
cells_on_page_to_rectangles = lift(rectangle_from_dict)
|
||||
cells_on_pages_to_rectangles = lift(cells_on_page_to_rectangles)
|
||||
|
||||
rectangles_per_ground_truth_page, rectangles_per_result_page = map(
|
||||
cells_on_pages_to_rectangles, (cells_per_ground_truth_page, cells_per_result_page)
|
||||
)
|
||||
|
||||
scores = lmap(compute_page_iou, rectangles_per_result_page, rectangles_per_ground_truth_page)
|
||||
|
||||
n_cells_per_page = np.array(lmap(len, cells_per_ground_truth_page))
|
||||
document_score = np.average(scores, weights=n_cells_per_page / n_cells_per_page.sum())
|
||||
|
||||
return document_score
|
||||
|
||||
|
||||
def rectangle_from_dict(d):
|
||||
x1, y1, w, h = itemgetter("x", "y", "width", "height")(d)
|
||||
return Rectangle(x1, y1, x1 + w, y1 + h)
|
||||
|
||||
|
||||
def compute_page_iou(predicted_rectangles: Iterable[Rectangle], true_rectangles: Iterable[Rectangle]):
|
||||
def find_best_iou(sum_so_far_and_candidate_rectangles, true_rectangle):
|
||||
sum_so_far, predicted_rectangles = sum_so_far_and_candidate_rectangles
|
||||
best_match, best_iou = find_max_overlap(true_rectangle, predicted_rectangles)
|
||||
return sum_so_far + best_iou, predicted_rectangles - {best_match}
|
||||
|
||||
predicted_rectangles = set(predicted_rectangles)
|
||||
true_rectangles = set(true_rectangles)
|
||||
|
||||
iou_sum = first(reduce(find_best_iou, true_rectangles, (0, predicted_rectangles)))
|
||||
normalizing_factor = 1 / max(len(predicted_rectangles), len(true_rectangles))
|
||||
score = normalizing_factor * iou_sum
|
||||
|
||||
return score
|
||||
|
||||
|
||||
def find_max_overlap(rectangle: Rectangle, candidate_rectangles: Iterable[Rectangle]):
|
||||
best_candidate_rectangle = max(candidate_rectangles, key=rectangle.iou)
|
||||
iou = rectangle.iou(best_candidate_rectangle)
|
||||
return best_candidate_rectangle, iou
|
||||
38
cv_analysis/utils/morphing.py
Normal file
38
cv_analysis/utils/morphing.py
Normal file
@ -0,0 +1,38 @@
|
||||
from typing import Tuple
|
||||
|
||||
from PIL import Image
|
||||
from loguru import logger
|
||||
|
||||
from cv_analysis.utils.image_operations import compute_pasting_coordinates
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
from synthesis.segment.content_rectangle import ContentRectangle
|
||||
|
||||
|
||||
def shrink_rectangle(rectangle: Rectangle, factor: float) -> Rectangle:
|
||||
x1, y1, x2, y2 = compute_scaled_coordinates(rectangle, (1 - factor))
|
||||
|
||||
logger.trace(f"Shrinking {rectangle} by {factor} to ({x1}, {y1}, {x2}, {y2}).")
|
||||
|
||||
assert x1 >= rectangle.x1
|
||||
assert y1 >= rectangle.y1
|
||||
assert x2 <= rectangle.x2
|
||||
assert y2 <= rectangle.y2
|
||||
|
||||
shrunk_rectangle = Rectangle(x1, y1, x2, y2)
|
||||
|
||||
if isinstance(rectangle, ContentRectangle): # TODO: Refactor
|
||||
shrunk_rectangle = ContentRectangle(*shrunk_rectangle.coords, rectangle.content)
|
||||
|
||||
return shrunk_rectangle
|
||||
|
||||
|
||||
def compute_scaled_coordinates(rectangle: Rectangle, factor: float) -> Tuple[int, int, int, int]:
|
||||
# FIXME: Refactor: Using image to compute coordinates is not clean
|
||||
image = Image.new("RGBA", (rectangle.width, rectangle.height))
|
||||
scaled = image.resize((int(rectangle.width * factor), int(rectangle.height * factor)))
|
||||
|
||||
x1, y1 = compute_pasting_coordinates(scaled, image)
|
||||
x1 = rectangle.x1 + x1
|
||||
y1 = rectangle.y1 + y1
|
||||
x2, y2 = x1 + scaled.width, y1 + scaled.height
|
||||
return x1, y1, x2, y2
|
||||
@ -1,27 +0,0 @@
|
||||
from numpy import array, ndarray
|
||||
import pdf2image
|
||||
from PIL import Image
|
||||
|
||||
from cv_analysis.utils.preprocessing import preprocess_page_array
|
||||
|
||||
|
||||
def open_pdf(pdf, first_page=0, last_page=None):
|
||||
|
||||
first_page += 1
|
||||
last_page = None if last_page is None else last_page + 1
|
||||
|
||||
if type(pdf) == str:
|
||||
if pdf.lower().endswith((".png", ".jpg", ".jpeg")):
|
||||
pages = [Image.open(pdf)]
|
||||
elif pdf.lower().endswith(".pdf"):
|
||||
pages = pdf2image.convert_from_path(pdf, first_page=first_page, last_page=last_page)
|
||||
else:
|
||||
raise IOError("Invalid file extension. Accepted filetypes:\n\t.png\n\t.jpg\n\t.jpeg\n\t.pdf")
|
||||
elif type(pdf) == bytes:
|
||||
pages = pdf2image.convert_from_bytes(pdf, first_page=first_page, last_page=last_page)
|
||||
elif type(pdf) in {list, ndarray}:
|
||||
return pdf
|
||||
|
||||
pages = [preprocess_page_array(array(p)) for p in pages]
|
||||
|
||||
return pages
|
||||
@ -1,15 +1,17 @@
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from itertools import starmap, compress
|
||||
from typing import Iterable, List
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
from typing import Iterable, List, Sequence
|
||||
|
||||
from funcy import lremove
|
||||
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
|
||||
|
||||
def remove_overlapping(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
|
||||
def overlap(a: Rectangle, rect2: Rectangle) -> float:
|
||||
return a.intersection(rect2) > 0
|
||||
|
||||
def does_not_overlap(rect: Rectangle, rectangles: Iterable[Rectangle]) -> list:
|
||||
def does_not_overlap(rect: Rectangle, rectangles: Iterable[Rectangle]) -> bool:
|
||||
return not any(overlap(rect, rect2) for rect2 in rectangles if not rect == rect2)
|
||||
|
||||
rectangles = list(filter(partial(does_not_overlap, rectangles=rectangles), rectangles))
|
||||
@ -17,15 +19,28 @@ def remove_overlapping(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
|
||||
|
||||
|
||||
def remove_included(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
|
||||
keep = [rect for rect in rectangles if not rect.is_included(rectangles)]
|
||||
return keep
|
||||
rectangles_to_keep = [rect for rect in rectangles if not rect.is_included(rectangles)]
|
||||
return rectangles_to_keep
|
||||
|
||||
|
||||
def remove_small(boxes: Iterable[Rectangle], page_width, page_height, min_percentage=0.13) -> List[Rectangle]:
|
||||
min_width = page_width * min_percentage
|
||||
min_height = page_height * min_percentage
|
||||
|
||||
def small(box: Rectangle):
|
||||
return box.width < min_width or box.height < min_height
|
||||
|
||||
return lremove(small, boxes)
|
||||
|
||||
|
||||
def __remove_isolated_unsorted(rectangles: Iterable[Rectangle]) -> List[Rectangle]:
|
||||
def is_connected(rect: Rectangle, rectangles: Iterable[Rectangle]):
|
||||
return any(rect.adjacent(rect2) for rect2 in rectangles if not rect == rect2)
|
||||
|
||||
rectangles = list(filter(partial(is_connected, rectangles=list(rectangles)), rectangles))
|
||||
if not isinstance(rectangles, list):
|
||||
rectangles = list(rectangles)
|
||||
|
||||
rectangles = list(filter(partial(is_connected, rectangles=rectangles), rectangles))
|
||||
return rectangles
|
||||
|
||||
|
||||
@ -42,9 +57,9 @@ def __remove_isolated_sorted(rectangles: Iterable[Rectangle]) -> List[Rectangle]
|
||||
return rectangles
|
||||
|
||||
|
||||
def remove_isolated(rectangles: Iterable[Rectangle], input_unsorted=True) -> List[Rectangle]:
|
||||
def remove_isolated(rectangles: Iterable[Rectangle], input_unsorted: bool = True) -> List[Rectangle]:
|
||||
return (__remove_isolated_unsorted if input_unsorted else __remove_isolated_sorted)(rectangles)
|
||||
|
||||
|
||||
def has_no_parent(hierarchy):
|
||||
def has_no_parent(hierarchy: Sequence[int]) -> bool:
|
||||
return hierarchy[-1] <= 0
|
||||
|
||||
99
cv_analysis/utils/rectangle.py
Normal file
99
cv_analysis/utils/rectangle.py
Normal file
@ -0,0 +1,99 @@
|
||||
# See https://stackoverflow.com/a/33533514
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Iterable, Union
|
||||
|
||||
from funcy import identity
|
||||
|
||||
from cv_analysis.utils.spacial import adjacent, contains, intersection, iou, area, is_contained, shift
|
||||
|
||||
Coord = Union[int, float]
|
||||
|
||||
|
||||
class Rectangle:
|
||||
def __init__(self, x1, y1, x2, y2, discrete=True):
|
||||
"""Creates a rectangle from two points."""
|
||||
nearest_valid = int if discrete else identity
|
||||
|
||||
self.__x1 = nearest_valid(x1)
|
||||
self.__y1 = nearest_valid(y1)
|
||||
self.__x2 = nearest_valid(x2)
|
||||
self.__y2 = nearest_valid(y2)
|
||||
|
||||
def __repr__(self):
|
||||
return f"Rectangle({self.x1}, {self.y1}, {self.x2}, {self.y2})"
|
||||
|
||||
@property
|
||||
def x1(self):
|
||||
return self.__x1
|
||||
|
||||
@property
|
||||
def x2(self):
|
||||
return self.__x2
|
||||
|
||||
@property
|
||||
def y1(self):
|
||||
return self.__y1
|
||||
|
||||
@property
|
||||
def y2(self):
|
||||
return self.__y2
|
||||
|
||||
@property
|
||||
def width(self):
|
||||
return abs(self.x2 - self.x1)
|
||||
|
||||
@property
|
||||
def height(self):
|
||||
return abs(self.y2 - self.y1)
|
||||
|
||||
@property
|
||||
def coords(self):
|
||||
return [self.x1, self.y1, self.x2, self.y2]
|
||||
|
||||
@property
|
||||
def size(self):
|
||||
return self.width, self.height
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.x1, self.y1, self.x2, self.y2))
|
||||
|
||||
def __iter__(self):
|
||||
yield self.x1
|
||||
yield self.y1
|
||||
yield self.width
|
||||
yield self.height
|
||||
|
||||
def area(self):
|
||||
"""Calculates the area of this rectangle."""
|
||||
return area(self)
|
||||
|
||||
def intersection(self, other):
|
||||
"""Calculates the intersection of this and the given other rectangle."""
|
||||
return intersection(self, other)
|
||||
|
||||
def iou(self, other: Rectangle):
|
||||
"""Calculates the intersection over union of this and the given other rectangle."""
|
||||
return iou(self, other)
|
||||
|
||||
def includes(self, other: Rectangle, tol=3):
|
||||
"""Checks if this rectangle contains the given other."""
|
||||
return contains(self, other, tol)
|
||||
|
||||
def is_included(self, rectangles: Iterable[Rectangle]):
|
||||
"""Checks if this rectangle is contained by any of the given rectangles."""
|
||||
return is_contained(self, rectangles)
|
||||
|
||||
def adjacent(self, other: Rectangle, tolerance=7):
|
||||
"""Checks if this rectangle is adjacent to the given other."""
|
||||
return adjacent(self, other, tolerance)
|
||||
|
||||
def shift(self, dx, dy):
|
||||
"""Shifts this rectangle by the given amount."""
|
||||
x1, y1, x2, y2 = shift(self, dx, dy)
|
||||
self.__x1 = x1
|
||||
self.__y1 = y1
|
||||
self.__x2 = x2
|
||||
self.__y2 = y2
|
||||
|
||||
return self
|
||||
294
cv_analysis/utils/spacial.py
Normal file
294
cv_analysis/utils/spacial.py
Normal file
@ -0,0 +1,294 @@
|
||||
# See https://stackoverflow.com/a/39757388
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import lru_cache
|
||||
from operator import attrgetter
|
||||
from typing import TYPE_CHECKING, Iterable
|
||||
|
||||
from funcy import juxt, rpartial, compose, lflatten, first, second
|
||||
|
||||
from cv_analysis.utils import lift
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
|
||||
|
||||
def adjacent(alpha: Rectangle, beta: Rectangle, tolerance=7, strict=False):
|
||||
"""Checks if the two rectangles are adjacent to each other.
|
||||
|
||||
Args:
|
||||
alpha: The first rectangle.
|
||||
beta: The second rectangle.
|
||||
tolerance: The maximum distance between the two rectangles.
|
||||
strict: If True, the rectangles must be adjacent along one axis and contained within the other axis. Else, the
|
||||
rectangles must be adjacent along one axis and overlapping the other axis.
|
||||
Returns:
|
||||
True if the two rectangles are adjacent to each other, False otherwise.
|
||||
"""
|
||||
select_strictness_variant = first if strict else second
|
||||
test_candidates = [
|
||||
# +---+
|
||||
# | | +---+
|
||||
# | a | | b |
|
||||
# | | +___+
|
||||
# +___+
|
||||
(right_left_aligned_and_vertically_contained, right_left_aligned_and_vertically_overlapping),
|
||||
# +---+
|
||||
# +---+ | |
|
||||
# | b | | a |
|
||||
# +___+ | |
|
||||
# +___+
|
||||
(left_right_aligned_and_vertically_contained, left_right_aligned_and_vertically_overlapping),
|
||||
# +-----------+
|
||||
# | a |
|
||||
# +___________+
|
||||
# +-----+
|
||||
# | b |
|
||||
# +_____+
|
||||
(bottom_top_aligned_and_horizontally_contained, bottom_top_aligned_and_horizontally_overlapping),
|
||||
# +-----+
|
||||
# | b |
|
||||
# +_____+
|
||||
# +-----------+
|
||||
# | a |
|
||||
# +___________+
|
||||
(top_bottom_aligned_and_horizontally_contained, top_bottom_aligned_and_horizontally_overlapping),
|
||||
]
|
||||
|
||||
tests = map(select_strictness_variant, test_candidates)
|
||||
return any(juxt(*tests)(alpha, beta, tolerance))
|
||||
|
||||
|
||||
def right_left_aligned_and_vertically_overlapping(alpha: Rectangle, beta: Rectangle, tol):
|
||||
"""Checks if the first rectangle is left of the other within a tolerance and also overlaps the other's y range."""
|
||||
return adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
|
||||
alpha.x2, beta.x1, beta.y1, beta.y2, alpha.y1, alpha.y2, tolerance=tol
|
||||
)
|
||||
|
||||
|
||||
def left_right_aligned_and_vertically_overlapping(alpha: Rectangle, beta: Rectangle, tol):
|
||||
"""Checks if the first rectangle is right of the other within a tolerance and also overlaps the other's y range."""
|
||||
return adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
|
||||
alpha.x1, beta.x2, beta.y1, beta.y2, alpha.y1, alpha.y2, tolerance=tol
|
||||
)
|
||||
|
||||
|
||||
def bottom_top_aligned_and_horizontally_overlapping(alpha: Rectangle, beta: Rectangle, tol):
|
||||
"""Checks if the first rectangle is above the other within a tolerance and also overlaps the other's x range."""
|
||||
return adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
|
||||
alpha.y2, beta.y1, beta.x1, beta.x2, alpha.x1, alpha.x2, tolerance=tol
|
||||
)
|
||||
|
||||
|
||||
def top_bottom_aligned_and_horizontally_overlapping(alpha: Rectangle, beta: Rectangle, tol):
|
||||
"""Checks if the first rectangle is below the other within a tolerance and also overlaps the other's x range."""
|
||||
return adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
|
||||
alpha.y1, beta.y2, beta.x1, beta.x2, alpha.x1, alpha.x2, tolerance=tol
|
||||
)
|
||||
|
||||
|
||||
def right_left_aligned_and_vertically_contained(alpha: Rectangle, beta: Rectangle, tol):
|
||||
"""Checks if the first rectangle is left of the other within a tolerance and also contains the other's y range."""
|
||||
return adjacent_along_one_axis_and_contained_within_perpendicular_axis(
|
||||
alpha.x2, beta.x1, beta.y1, beta.y2, alpha.y1, alpha.y2, tolerance=tol
|
||||
)
|
||||
|
||||
|
||||
def left_right_aligned_and_vertically_contained(alpha: Rectangle, beta: Rectangle, tol):
|
||||
"""Checks if the first rectangle is right of the other within a tolerance and also contains the other's y range."""
|
||||
return adjacent_along_one_axis_and_contained_within_perpendicular_axis(
|
||||
alpha.x1, beta.x2, beta.y1, beta.y2, alpha.y1, alpha.y2, tolerance=tol
|
||||
)
|
||||
|
||||
|
||||
def bottom_top_aligned_and_horizontally_contained(alpha: Rectangle, beta: Rectangle, tol):
|
||||
"""Checks if the first rectangle is above the other within a tolerance and also contains the other's x range."""
|
||||
return adjacent_along_one_axis_and_contained_within_perpendicular_axis(
|
||||
alpha.y2, beta.y1, beta.x1, beta.x2, alpha.x1, alpha.x2, tolerance=tol
|
||||
)
|
||||
|
||||
|
||||
def top_bottom_aligned_and_horizontally_contained(alpha: Rectangle, beta: Rectangle, tol):
|
||||
"""Checks if the first rectangle is below the other within a tolerance and also contains the other's x range."""
|
||||
return adjacent_along_one_axis_and_contained_within_perpendicular_axis(
|
||||
alpha.y1, beta.y2, beta.x1, beta.x2, alpha.x1, alpha.x2, tolerance=tol
|
||||
)
|
||||
|
||||
|
||||
def adjacent_along_one_axis_and_overlapping_along_perpendicular_axis(
|
||||
axis_0_point_1,
|
||||
axis_1_point_2,
|
||||
axis_1_contained_point_1,
|
||||
axis_1_contained_point_2,
|
||||
axis_1_lower_bound,
|
||||
axis_1_upper_bound,
|
||||
tolerance,
|
||||
):
|
||||
"""Checks if two points are adjacent along one axis and two other points overlap a range along the perpendicular
|
||||
axis.
|
||||
"""
|
||||
return adjacent_along_one_axis_and_overlapping_or_contained_along_perpendicular_axis(
|
||||
axis_0_point_1,
|
||||
axis_1_point_2,
|
||||
axis_1_contained_point_1,
|
||||
axis_1_contained_point_2,
|
||||
axis_1_lower_bound,
|
||||
axis_1_upper_bound,
|
||||
tolerance,
|
||||
mode="overlapping",
|
||||
)
|
||||
|
||||
|
||||
def adjacent_along_one_axis_and_contained_within_perpendicular_axis(
|
||||
axis_0_point_1,
|
||||
axis_1_point_2,
|
||||
axis_1_contained_point_1,
|
||||
axis_1_contained_point_2,
|
||||
axis_1_lower_bound,
|
||||
axis_1_upper_bound,
|
||||
tolerance,
|
||||
):
|
||||
"""Checks if two points are adjacent along one axis and two other points overlap a range along the perpendicular
|
||||
axis.
|
||||
"""
|
||||
return adjacent_along_one_axis_and_overlapping_or_contained_along_perpendicular_axis(
|
||||
axis_0_point_1,
|
||||
axis_1_point_2,
|
||||
axis_1_contained_point_1,
|
||||
axis_1_contained_point_2,
|
||||
axis_1_lower_bound,
|
||||
axis_1_upper_bound,
|
||||
tolerance,
|
||||
mode="contained",
|
||||
)
|
||||
|
||||
|
||||
def adjacent_along_one_axis_and_overlapping_or_contained_along_perpendicular_axis(
|
||||
axis_0_point_1,
|
||||
axis_1_point_2,
|
||||
axis_1_contained_point_1,
|
||||
axis_1_contained_point_2,
|
||||
axis_1_lower_bound,
|
||||
axis_1_upper_bound,
|
||||
tolerance,
|
||||
mode,
|
||||
):
|
||||
"""Checks if two points are adjacent along one axis and two other points overlap a range along the perpendicular
|
||||
axis or are contained in that range, depending on the mode specified.
|
||||
"""
|
||||
assert mode in ["overlapping", "contained"]
|
||||
quantifier = any if mode == "overlapping" else all
|
||||
return all(
|
||||
[
|
||||
abs(axis_0_point_1 - axis_1_point_2) <= tolerance,
|
||||
quantifier(
|
||||
[
|
||||
axis_1_lower_bound <= p <= axis_1_upper_bound
|
||||
for p in [axis_1_contained_point_1, axis_1_contained_point_2]
|
||||
]
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def contains(alpha: Rectangle, beta: Rectangle, tol=3):
|
||||
"""Checks if the first rectangle contains the second rectangle."""
|
||||
return (
|
||||
beta.x1 + tol >= alpha.x1
|
||||
and beta.y1 + tol >= alpha.y1
|
||||
and beta.x2 - tol <= alpha.x2
|
||||
and beta.y2 - tol <= alpha.y2
|
||||
)
|
||||
|
||||
|
||||
def is_contained(rectangle: Rectangle, rectangles: Iterable[Rectangle]):
|
||||
"""Checks if the rectangle is contained within any of the other rectangles."""
|
||||
other_rectangles = filter(lambda r: r != rectangle, rectangles)
|
||||
return any(map(rpartial(contains, rectangle), other_rectangles))
|
||||
|
||||
|
||||
def iou(alpha: Rectangle, beta: Rectangle):
|
||||
"""Calculates the intersection area over the union area of two rectangles."""
|
||||
return intersection(alpha, beta) / union(alpha, beta)
|
||||
|
||||
|
||||
def area(rectangle: Rectangle):
|
||||
"""Calculates the area of a rectangle."""
|
||||
return abs((rectangle.x2 - rectangle.x1) * (rectangle.y2 - rectangle.y1))
|
||||
|
||||
|
||||
def union(alpha: Rectangle, beta: Rectangle):
|
||||
"""Calculates the union area of two rectangles."""
|
||||
return area(alpha) + area(beta) - intersection(alpha, beta)
|
||||
|
||||
|
||||
@lru_cache(maxsize=1000)
|
||||
def intersection(alpha, beta):
|
||||
"""Calculates the intersection of two rectangles."""
|
||||
return intersection_along_x_axis(alpha, beta) * intersection_along_y_axis(alpha, beta)
|
||||
|
||||
|
||||
def intersection_along_x_axis(alpha, beta):
|
||||
"""Calculates the intersection along the x-axis."""
|
||||
return intersection_along_axis(alpha, beta, "x")
|
||||
|
||||
|
||||
def intersection_along_y_axis(alpha, beta):
|
||||
"""Calculates the intersection along the y-axis."""
|
||||
return intersection_along_axis(alpha, beta, "y")
|
||||
|
||||
|
||||
def intersection_along_axis(alpha, beta, axis):
|
||||
"""Calculates the intersection along the given axis.
|
||||
|
||||
Cases:
|
||||
a b
|
||||
[-----] (---) ==> [a1, b1, a2, b2] ==> max(0, (a2 - b1)) = 0
|
||||
b a
|
||||
(---) [-----] ==> [b1, a1, b2, a2] ==> max(0, (b2 - a1)) = 0
|
||||
a b
|
||||
[--(----]----) ==> [a1, b1, a2, b2] ==> max(0, (a2 - b1)) = (a2 - b1)
|
||||
a b
|
||||
(-[---]----) ==> [b1, a1, a2, b2] ==> max(0, (a2 - a1)) = (a2 - a1)
|
||||
b a
|
||||
[-(---)----] ==> [a1, b1, b2, a2] ==> max(0, (b2 - b1)) = (b2 - b1)
|
||||
b a
|
||||
(----[--)----] ==> [b1, a1, b2, a2] ==> max(0, (b2 - a1)) = (b2 - a1)
|
||||
"""
|
||||
assert axis in ["x", "y"]
|
||||
|
||||
def get_component_accessor(component):
|
||||
"""Returns a function that accesses the given component of a rectangle."""
|
||||
return attrgetter(f"{axis}{component}")
|
||||
|
||||
def make_access_components_and_sort_fn(component):
|
||||
"""Returns a function that accesses and sorts the given component of multiple rectangles."""
|
||||
assert component in [1, 2]
|
||||
return compose(sorted, lift(get_component_accessor(component)))
|
||||
|
||||
sort_first_components, sort_second_components = map(make_access_components_and_sort_fn, [1, 2])
|
||||
|
||||
min_c1, max_c1, min_c2, max_c2 = lflatten(juxt(sort_first_components, sort_second_components)((alpha, beta)))
|
||||
intersection = max(0, min_c2 - max_c1)
|
||||
return intersection
|
||||
|
||||
|
||||
def related(alpha: Rectangle, beta: Rectangle):
|
||||
"""Checks if two rectangles lie close by or overlap."""
|
||||
return close(alpha, beta) or overlap(alpha, beta)
|
||||
|
||||
|
||||
def close(alpha: Rectangle, beta: Rectangle, max_gap=14):
|
||||
"""Checks if two rectangles are close to each other."""
|
||||
# FIXME: Parameterize via factory
|
||||
return adjacent(alpha, beta, tolerance=max_gap, strict=True)
|
||||
|
||||
|
||||
def overlap(alpha: Rectangle, beta: Rectangle):
|
||||
"""Checks if two rectangles overlap."""
|
||||
return intersection(alpha, beta) > 0
|
||||
|
||||
|
||||
def shift(rectangle: Rectangle, dx: int, dy: int):
|
||||
"""Shifts a rectangle by the given amount."""
|
||||
return rectangle.x1 + dx, rectangle.y1 + dy, rectangle.x2 + dx, rectangle.y2 + dy
|
||||
@ -1,131 +0,0 @@
|
||||
from json import dumps
|
||||
|
||||
from typing import Iterable
|
||||
import numpy as np
|
||||
from funcy import identity
|
||||
|
||||
|
||||
class Rectangle:
|
||||
def __init__(self, x1=None, y1=None, w=None, h=None, x2=None, y2=None, indent=4, format="xywh", discrete=True):
|
||||
make_discrete = int if discrete else identity
|
||||
|
||||
try:
|
||||
self.x1 = make_discrete(x1)
|
||||
self.y1 = make_discrete(y1)
|
||||
self.w = make_discrete(w) if w else make_discrete(x2 - x1)
|
||||
self.h = make_discrete(h) if h else make_discrete(y2 - y1)
|
||||
self.x2 = make_discrete(x2) if x2 else self.x1 + self.w
|
||||
self.y2 = make_discrete(y2) if y2 else self.y1 + self.h
|
||||
assert np.isclose(self.x1 + self.w, self.x2)
|
||||
assert np.isclose(self.y1 + self.h, self.y2)
|
||||
self.indent = indent
|
||||
self.format = format
|
||||
except Exception as err:
|
||||
raise Exception("x1, y1, (w|x2), and (h|y2) must be defined.") from err
|
||||
|
||||
def json_xywh(self):
|
||||
return {"x": self.x1, "y": self.y1, "width": self.w, "height": self.h}
|
||||
|
||||
def json_xyxy(self):
|
||||
return {"x1": self.x1, "y1": self.y1, "x2": self.x2, "y2": self.y2}
|
||||
|
||||
def json_full(self):
|
||||
# TODO: can we make all coords x0, y0 based? :)
|
||||
return {
|
||||
"x0": self.x1,
|
||||
"y0": self.y1,
|
||||
"x1": self.x2,
|
||||
"y1": self.y2,
|
||||
"width": self.w,
|
||||
"height": self.h,
|
||||
}
|
||||
|
||||
def json(self):
|
||||
json_func = {"xywh": self.json_xywh, "xyxy": self.json_xyxy}.get(self.format, self.json_full)
|
||||
return json_func()
|
||||
|
||||
def xyxy(self):
|
||||
return self.x1, self.y1, self.x2, self.y2
|
||||
|
||||
def xywh(self):
|
||||
return self.x1, self.y1, self.w, self.h
|
||||
|
||||
def intersection(self, rect):
|
||||
bx1, by1, bx2, by2 = rect.xyxy()
|
||||
if (self.x1 > bx2) or (bx1 > self.x2) or (self.y1 > by2) or (by1 > self.y2):
|
||||
return 0
|
||||
intersection_ = (min(self.x2, bx2) - max(self.x1, bx1)) * (min(self.y2, by2) - max(self.y1, by1))
|
||||
return intersection_
|
||||
|
||||
def area(self):
|
||||
return (self.x2 - self.x1) * (self.y2 - self.y1)
|
||||
|
||||
def iou(self, rect):
|
||||
intersection = self.intersection(rect)
|
||||
if intersection == 0:
|
||||
return 0
|
||||
union = self.area() + rect.area() - intersection
|
||||
return intersection / union
|
||||
|
||||
def includes(self, other: "Rectangle", tol=3):
|
||||
"""does a include b?"""
|
||||
return (
|
||||
other.x1 + tol >= self.x1
|
||||
and other.y1 + tol >= self.y1
|
||||
and other.x2 - tol <= self.x2
|
||||
and other.y2 - tol <= self.y2
|
||||
)
|
||||
|
||||
def is_included(self, rectangles: Iterable["Rectangle"]):
|
||||
return any(rect.includes(self) for rect in rectangles if not rect == self)
|
||||
|
||||
def adjacent(self, rect2: "Rectangle", tolerance=7):
|
||||
# tolerance=1 was set too low; most lines are 2px wide
|
||||
def adjacent2d(sixtuple):
|
||||
g, h, i, j, k, l = sixtuple
|
||||
return (abs(g - h) <= tolerance) and any(k <= p <= l for p in [i, j])
|
||||
|
||||
if rect2 is None:
|
||||
return False
|
||||
return any(
|
||||
map(
|
||||
adjacent2d,
|
||||
[
|
||||
(self.x2, rect2.x1, rect2.y1, rect2.y2, self.y1, self.y2),
|
||||
(self.x1, rect2.x2, rect2.y1, rect2.y2, self.y1, self.y2),
|
||||
(self.y2, rect2.y1, rect2.x1, rect2.x2, self.x1, self.x2),
|
||||
(self.y1, rect2.y2, rect2.x1, rect2.x2, self.x1, self.x2),
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_xyxy(cls, xyxy_tuple, discrete=True):
|
||||
x1, y1, x2, y2 = xyxy_tuple
|
||||
return cls(x1=x1, y1=y1, x2=x2, y2=y2, discrete=discrete)
|
||||
|
||||
@classmethod
|
||||
def from_xywh(cls, xywh_tuple, discrete=True):
|
||||
x, y, w, h = xywh_tuple
|
||||
return cls(x1=x, y1=y, w=w, h=h, discrete=discrete)
|
||||
|
||||
@classmethod
|
||||
def from_dict_xywh(cls, xywh_dict, discrete=True):
|
||||
return cls(x1=xywh_dict["x"], y1=xywh_dict["y"], w=xywh_dict["width"], h=xywh_dict["height"], discrete=discrete)
|
||||
|
||||
def __str__(self):
|
||||
return dumps(self.json(), indent=self.indent)
|
||||
|
||||
def __repr__(self):
|
||||
return str(self.json())
|
||||
|
||||
def __iter__(self):
|
||||
return list(self.json().values()).__iter__()
|
||||
|
||||
def __eq__(self, rect):
|
||||
return all([self.x1 == rect.x1, self.y1 == rect.y1, self.w == rect.w, self.h == rect.h])
|
||||
|
||||
|
||||
class Contour:
|
||||
def __init__(self):
|
||||
pass
|
||||
@ -1,61 +0,0 @@
|
||||
from typing import Iterable
|
||||
import numpy as np
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
|
||||
|
||||
def find_max_overlap(box: Rectangle, box_list: Iterable[Rectangle]):
|
||||
best_candidate = max(box_list, key=lambda x: box.iou(x))
|
||||
iou = box.iou(best_candidate)
|
||||
return best_candidate, iou
|
||||
|
||||
|
||||
def compute_page_iou(results_boxes: Iterable[Rectangle], ground_truth_boxes: Iterable[Rectangle]):
|
||||
results = list(results_boxes)
|
||||
truth = list(ground_truth_boxes)
|
||||
if (not results) or (not truth):
|
||||
return 0
|
||||
iou_sum = 0
|
||||
denominator = max(len(results), len(truth))
|
||||
while results and truth:
|
||||
gt_box = truth.pop()
|
||||
best_match, best_iou = find_max_overlap(gt_box, results)
|
||||
results.remove(best_match)
|
||||
iou_sum += best_iou
|
||||
score = iou_sum / denominator
|
||||
return score
|
||||
|
||||
|
||||
def compute_document_score(results_dict, annotation_dict):
|
||||
|
||||
page_weights = np.array([len(page["cells"]) for page in annotation_dict["pages"]])
|
||||
page_weights = page_weights / sum(page_weights)
|
||||
|
||||
scores = []
|
||||
for i in range(len(annotation_dict["pages"])):
|
||||
scores.append(
|
||||
compute_page_iou(
|
||||
map(Rectangle.from_dict_xywh, results_dict["pages"][i]["cells"]),
|
||||
map(Rectangle.from_dict_xywh, annotation_dict["pages"][i]["cells"]),
|
||||
)
|
||||
)
|
||||
|
||||
doc_score = np.average(np.array(scores), weights=page_weights)
|
||||
|
||||
return doc_score
|
||||
|
||||
|
||||
"""
|
||||
from cv_analysis.utils.test_metrics import *
|
||||
|
||||
r1 = Rectangle.from_dict_xywh({'x': 30, 'y': 40, 'width': 50, 'height': 60})
|
||||
r2 = Rectangle.from_dict_xywh({'x': 40, 'y': 30, 'width': 55, 'height': 65})
|
||||
r3 = Rectangle.from_dict_xywh({'x': 45, 'y': 35, 'width': 45, 'height': 55})
|
||||
r4 = Rectangle.from_dict_xywh({'x': 25, 'y': 45, 'width': 45, 'height': 55})
|
||||
d1 = {"pages": [{"cells": [r1.json_xywh(), r2.json_xywh()]}]}
|
||||
d2 = {"pages": [{"cells": [r3.json_xywh(), r4.json_xywh()]}]}
|
||||
|
||||
compute_iou_from_boxes(r1, r2)
|
||||
find_max_overlap(r1, [r2, r3, r4])
|
||||
compute_page_iou([r1, r2], [r3, r4])
|
||||
compute_document_score(d1, d2)
|
||||
"""
|
||||
@ -1,9 +1,19 @@
|
||||
from numpy import generic
|
||||
from __future__ import annotations
|
||||
|
||||
import itertools
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from funcy import first, iterate, keep, lmap, repeatedly
|
||||
from numpy import generic
|
||||
|
||||
|
||||
def copy_and_normalize_channels(image):
|
||||
|
||||
if isinstance(image, Image.Image):
|
||||
image = np.array(image)
|
||||
|
||||
image = image.copy()
|
||||
try:
|
||||
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
|
||||
@ -17,3 +27,64 @@ def npconvert(ob):
|
||||
if isinstance(ob, generic):
|
||||
return ob.item()
|
||||
raise TypeError
|
||||
|
||||
|
||||
def lift(fn):
|
||||
def lifted(coll):
|
||||
yield from map(fn, coll)
|
||||
|
||||
return lifted
|
||||
|
||||
|
||||
def star(fn):
|
||||
def starred(args):
|
||||
return fn(*args)
|
||||
|
||||
return starred
|
||||
|
||||
|
||||
def lstarkeep(fn, coll):
|
||||
return list(starkeep(fn, coll))
|
||||
|
||||
|
||||
def starkeep(fn, coll):
|
||||
yield from keep(star(fn), coll)
|
||||
|
||||
|
||||
def until(cond, func, *args, **kwargs):
|
||||
return first(filter(cond, iterate(func, *args, **kwargs)))
|
||||
|
||||
|
||||
def conj(x, xs):
|
||||
return [x, *xs]
|
||||
|
||||
|
||||
def rconj(xs, x):
|
||||
return [*xs, x]
|
||||
|
||||
|
||||
def make_merger_sentinel():
|
||||
def no_new_mergers(records):
|
||||
nonlocal number_of_records_so_far
|
||||
|
||||
number_of_records_now = len(records)
|
||||
|
||||
if number_of_records_now == number_of_records_so_far:
|
||||
return True
|
||||
|
||||
else:
|
||||
number_of_records_so_far = number_of_records_now
|
||||
return False
|
||||
|
||||
number_of_records_so_far = -1
|
||||
|
||||
return no_new_mergers
|
||||
|
||||
|
||||
def zipmap(fn, boxes, n=2):
|
||||
rets = lmap(list, zip(*map(fn, boxes)))
|
||||
yield from repeatedly(lambda: [], n) if len(rets) < n else rets
|
||||
|
||||
|
||||
def every_nth(n, iterable):
|
||||
return itertools.islice(iterable, 0, None, n)
|
||||
|
||||
1278
poetry.lock
generated
1278
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -31,11 +31,26 @@ PyMuPDF = "^1.19.6"
|
||||
pdf2img = {git = "ssh://git@git.iqser.com:2222/rr/pdf2image.git", branch = "master"}
|
||||
pyinfra = {git = "ssh://git@git.iqser.com:2222/rr/pyinfra.git", branch = "master"}
|
||||
loguru = "^0.6.0"
|
||||
rdkit = "^2022.9.4"
|
||||
|
||||
[tool.poetry.group.build.dependencies]
|
||||
pytest = "^7.0.1"
|
||||
|
||||
|
||||
|
||||
[tool.poetry.group.test.dependencies]
|
||||
albumentations = "^1.3.0"
|
||||
faker = "^16.4.0"
|
||||
pandas = "^1.5.2"
|
||||
pytablewriter = "^0.64.2"
|
||||
dataframe-image = "^0.1.5"
|
||||
blend-modes = "^2.1.0"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
ipython = "^8.9.0"
|
||||
scalene = "^1.5.19"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
@ -1,50 +1,75 @@
|
||||
"""
|
||||
Usage:
|
||||
python scripts/annotate.py /home/iriley/Documents/pdf/scanned/10.pdf 5 --type table --show
|
||||
python scripts/annotate.py /home/iriley/Documents/pdf/scanned/10.pdf 5 --type redaction --show
|
||||
python scripts/annotate.py /home/iriley/Documents/pdf/scanned/10.pdf 5 --type layout --show
|
||||
python scripts/annotate.py /home/iriley/Documents/pdf/scanned/10.pdf 5 --type figure --show
|
||||
"""
|
||||
|
||||
import argparse
|
||||
|
||||
import loguru
|
||||
|
||||
from cv_analysis.figure_detection.figure_detection import detect_figures
|
||||
from cv_analysis.layout_parsing import parse_layout
|
||||
from cv_analysis.redaction_detection import find_redactions
|
||||
from cv_analysis.table_parsing import parse_tables
|
||||
from cv_analysis.utils.display import show_image
|
||||
from cv_analysis.utils.draw import draw_contours, draw_rectangles
|
||||
from cv_analysis.utils.open_pdf import open_pdf
|
||||
from cv_analysis.utils.visual_logging import vizlogger
|
||||
from cv_analysis.utils.drawing import draw_contours, draw_rectangles
|
||||
from cv_analysis.utils.input import open_analysis_input_file
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Annotate PDF pages with detected elements. Specified pages form a closed interval and are 1-based."
|
||||
)
|
||||
parser.add_argument("pdf_path")
|
||||
parser.add_argument("--page_index", type=int, default=0)
|
||||
parser.add_argument("--type", choices=["table", "redaction", "layout", "figure"], default="table")
|
||||
parser.add_argument("--show", action="store_true", default=False)
|
||||
parser.add_argument(
|
||||
"--first_page",
|
||||
"-f",
|
||||
type=int,
|
||||
default=1,
|
||||
)
|
||||
parser.add_argument(
|
||||
"-last_page",
|
||||
"-l",
|
||||
help="if not specified, defaults to the value of the first page specified",
|
||||
type=int,
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--type",
|
||||
"-t",
|
||||
help="element type to look for and analyze",
|
||||
choices=["table", "redaction", "layout", "figure"],
|
||||
default="table",
|
||||
)
|
||||
parser.add_argument("--page", "-p", type=int, default=1)
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def annotate_page(page_image, analysis_function, drawing_function, name="tmp.png", show=True):
|
||||
result = analysis_function(page_image)
|
||||
page_image = drawing_function(page_image, result)
|
||||
vizlogger.debug(page_image, name)
|
||||
def annotate_page(page_image, analysis_fn, draw_fn):
|
||||
result = analysis_fn(page_image)
|
||||
page_image = draw_fn(page_image, result)
|
||||
show_image(page_image)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
page = open_pdf(args.pdf_path, first_page=args.page_index, last_page=args.page_index)[0]
|
||||
name = f"{args.type}_final_result.png"
|
||||
draw = draw_rectangles
|
||||
if args.type == "table":
|
||||
from cv_analysis.table_parsing import parse_tables as analyze
|
||||
elif args.type == "redaction":
|
||||
from cv_analysis.redaction_detection import find_redactions as analyze
|
||||
def get_analysis_and_draw_fn_for_type(element_type):
|
||||
analysis_fn, draw_fn = {
|
||||
"table": (parse_tables, draw_rectangles),
|
||||
"redaction": (find_redactions, draw_contours),
|
||||
"layout": (parse_layout, draw_rectangles),
|
||||
"figure": (detect_figures, draw_rectangles),
|
||||
}[element_type]
|
||||
|
||||
draw = draw_contours
|
||||
elif args.type == "layout":
|
||||
from cv_analysis.layout_parsing import parse_layout as analyze
|
||||
elif args.type == "figure":
|
||||
from cv_analysis.figure_detection.figure_detection import detect_figures
|
||||
analyze = detect_figures
|
||||
annotate_page(page, analyze, draw, name=name, show=args.show)
|
||||
return analysis_fn, draw_fn
|
||||
|
||||
|
||||
def main(args):
|
||||
loguru.logger.info(f"Annotating {args.type}s in {args.pdf_path}...")
|
||||
|
||||
pages = open_analysis_input_file(args.pdf_path, first_page=args.first_page, last_page=args.last_page)
|
||||
|
||||
for page in pages:
|
||||
analysis_fn, draw_fn = get_analysis_and_draw_fn_for_type(args.type)
|
||||
annotate_page(page, analysis_fn, draw_fn)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main(parse_args())
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
|
||||
@ -10,7 +10,7 @@ from funcy import lmap
|
||||
from cv_analysis.figure_detection.figure_detection import detect_figures
|
||||
from cv_analysis.layout_parsing import parse_layout
|
||||
from cv_analysis.table_parsing import parse_tables
|
||||
from cv_analysis.utils.draw import draw_rectangles
|
||||
from cv_analysis.utils.drawing import draw_rectangles
|
||||
from pdf2img.conversion import convert_pages_to_images
|
||||
|
||||
|
||||
|
||||
@ -2,28 +2,27 @@ import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from cv_analysis.server.pipeline import get_analysis_pipeline
|
||||
from loguru import logger
|
||||
|
||||
from cv_analysis.server.pipeline import make_analysis_pipeline_for_element_type
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("pdf")
|
||||
parser.add_argument("--type", "-t", choices=["table", "layout", "figure"], required=True)
|
||||
parser.add_argument("pdf", type=Path)
|
||||
parser.add_argument("--element_type", "-t", choices=["table", "figure"], required=True)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main(args):
|
||||
|
||||
analysis_fn = make_analysis_pipeline_for_element_type(args.element_type)
|
||||
|
||||
logger.info(f"Analysing document for {args.element_type}s...")
|
||||
results = list(analysis_fn(args.pdf.read_bytes()))
|
||||
|
||||
print(json.dumps(results, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
|
||||
analysis_fn = get_analysis_pipeline(args.type)
|
||||
|
||||
with open(args.pdf, "rb") as f:
|
||||
pdf_bytes = f.read()
|
||||
|
||||
results = list(analysis_fn(pdf_bytes))
|
||||
|
||||
folder = Path(args.pdf).parent
|
||||
file_stem = Path(args.pdf).stem
|
||||
|
||||
with open(f"{folder}/{file_stem}_{args.type}.json", "w+") as f:
|
||||
json.dump(results, f, indent=2)
|
||||
main(parse_args())
|
||||
|
||||
@ -4,7 +4,7 @@ import logging
|
||||
from operator import itemgetter
|
||||
|
||||
from cv_analysis.config import get_config
|
||||
from cv_analysis.server.pipeline import get_analysis_pipeline
|
||||
from cv_analysis.server.pipeline import make_analysis_pipeline_for_segment_type
|
||||
from cv_analysis.utils.banner import make_art
|
||||
from pyinfra import config as pyinfra_config
|
||||
from pyinfra.queue.queue_manager import QueueManager
|
||||
@ -31,7 +31,10 @@ def analysis_callback(queue_message: dict):
|
||||
should_publish_result = True
|
||||
|
||||
object_bytes = gzip.decompress(storage.get_object(bucket, object_name))
|
||||
analysis_fn = get_analysis_pipeline(operation, CV_CONFIG.table_parsing_skip_pages_without_images)
|
||||
analysis_fn = make_analysis_pipeline_for_segment_type(
|
||||
operation,
|
||||
skip_pages_without_images=CV_CONFIG.table_parsing_skip_pages_without_images,
|
||||
)
|
||||
|
||||
results = analysis_fn(object_bytes)
|
||||
response = {**queue_message, "data": list(results)}
|
||||
|
||||
17
synthesis/__init__.py
Normal file
17
synthesis/__init__.py
Normal file
@ -0,0 +1,17 @@
|
||||
import argparse
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument()
|
||||
args = parser.parse_args()
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def main(args):
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(parse_args())
|
||||
87
synthesis/content_generator.py
Normal file
87
synthesis/content_generator.py
Normal file
@ -0,0 +1,87 @@
|
||||
import itertools
|
||||
from typing import List, Iterable
|
||||
|
||||
from PIL import Image
|
||||
from funcy import lsplit, lfilter, mapcat
|
||||
|
||||
from cv_analysis.logging import logger
|
||||
from cv_analysis.utils import every_nth, zipmap
|
||||
from cv_analysis.utils.geometric import is_square_like
|
||||
from cv_analysis.utils.merging import merge_related_rectangles
|
||||
from cv_analysis.utils.postprocessing import remove_included, remove_overlapping
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
from synthesis.randomization import rnd
|
||||
from synthesis.segment.content_rectangle import ContentRectangle
|
||||
from synthesis.segment.recursive_content_rectangle import RecursiveContentRectangle
|
||||
from synthesis.segment.segments import (
|
||||
generate_random_text_block,
|
||||
generate_recursive_random_table_with_caption,
|
||||
generate_random_plot_with_caption,
|
||||
)
|
||||
|
||||
|
||||
class ContentGenerator:
|
||||
def __init__(self):
|
||||
self.constrain_layouts = True
|
||||
|
||||
def __call__(self, boxes: List[Rectangle]) -> Image:
|
||||
rnd.shuffle(boxes)
|
||||
|
||||
figure_boxes, text_boxes = lsplit(is_square_like, boxes)
|
||||
|
||||
if self.constrain_layouts:
|
||||
figure_boxes = merge_related_rectangles(figure_boxes)
|
||||
figure_boxes = lfilter(is_square_like, figure_boxes)
|
||||
text_boxes = merge_related_rectangles(text_boxes)
|
||||
|
||||
boxes = list(
|
||||
itertools.chain(
|
||||
map(generate_random_text_block, every_nth(2, text_boxes)),
|
||||
*zipmap(generate_recursive_random_table_with_caption, every_nth(2, text_boxes[1:])),
|
||||
*zipmap(generate_recursive_random_table_with_caption, every_nth(2, figure_boxes)),
|
||||
*zipmap(generate_random_plot_with_caption, every_nth(2, figure_boxes[1:])),
|
||||
)
|
||||
)
|
||||
|
||||
if self.constrain_layouts:
|
||||
boxes = remove_included(boxes)
|
||||
boxes = remove_overlapping(boxes)
|
||||
|
||||
boxes = list(unpack_boxes(boxes))
|
||||
for b in boxes:
|
||||
logger.trace(f"Generated {b}")
|
||||
|
||||
return boxes
|
||||
|
||||
|
||||
def unpack_boxes(boxes: Iterable[ContentRectangle]) -> Iterable[ContentRectangle]:
|
||||
for box in boxes:
|
||||
yield box
|
||||
yield from mapcat(__unpack_box_rec, boxes)
|
||||
|
||||
|
||||
def __unpack_box_rec(box: ContentRectangle) -> Iterable[ContentRectangle]:
|
||||
children = box.accept(BoxChildrenVisitor())
|
||||
|
||||
def is_a_leaf():
|
||||
return not children
|
||||
|
||||
def is_an_internal_node():
|
||||
return children
|
||||
|
||||
if is_an_internal_node():
|
||||
yield from mapcat(__unpack_box_rec, children)
|
||||
|
||||
elif is_a_leaf():
|
||||
yield box
|
||||
|
||||
else:
|
||||
raise ValueError("This should not happen")
|
||||
|
||||
|
||||
class BoxChildrenVisitor:
|
||||
def visit_content_rectangle(self, _box: ContentRectangle):
|
||||
return []
|
||||
|
||||
def visit_recursive_content_rectangle(self, box: RecursiveContentRectangle):
|
||||
return box.children
|
||||
134
synthesis/formula.py
Normal file
134
synthesis/formula.py
Normal file
@ -0,0 +1,134 @@
|
||||
# Draw molecular structures from smiles. Adapted from https://github.com/neeraj-j/molecules
|
||||
from itertools import islice
|
||||
from typing import List, Iterable
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from PIL.Image import Image
|
||||
from funcy import first, retry, keep
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import AllChem
|
||||
from rdkit.Chem import Draw
|
||||
from rdkit.Chem import FunctionalGroups
|
||||
|
||||
from cv_analysis.locations import TEST_SMILES_FILE
|
||||
from cv_analysis.logging import debug_log, logger
|
||||
|
||||
|
||||
class StructuralFormulaImageGenerator:
|
||||
def __init__(self, width=None, height=None):
|
||||
self.width = width
|
||||
self.height = height
|
||||
|
||||
self.templates = collect_templates()
|
||||
self.functional_groups = self.templates.keys()
|
||||
|
||||
@debug_log()
|
||||
def generate_images_from_smiles(self, smiles: List[str], max_images_per_functional_group=1) -> Iterable[Image]:
|
||||
yield from self.generate_images_for_functional_groups(
|
||||
smiles,
|
||||
max_images_per_functional_group=max_images_per_functional_group,
|
||||
)
|
||||
|
||||
@debug_log()
|
||||
def generate_images_for_functional_groups(self, smiles: List[str], max_images_per_functional_group):
|
||||
for functional_group in self.functional_groups:
|
||||
smiles = iter(smiles)
|
||||
g = self.generate_images_for_functional_group(smiles, functional_group)
|
||||
yield from islice(keep(g), max_images_per_functional_group)
|
||||
|
||||
def generate_images_for_functional_group(self, smiles: Iterable[str], functional_group: str):
|
||||
try:
|
||||
yield from self.__generate_images_for_functional_group(smiles, functional_group)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
@debug_log()
|
||||
@retry(100, errors=ValueError)
|
||||
def __generate_images_for_functional_group(self, smiles: Iterable[str], functional_group: str):
|
||||
|
||||
AllChem.Compute2DCoords(self.templates[functional_group])
|
||||
|
||||
for smile in smiles:
|
||||
try:
|
||||
image = self.make_image(smile, functional_group)
|
||||
yield image
|
||||
except ValueError: # SMILE does not match functional group
|
||||
raise
|
||||
|
||||
@debug_log()
|
||||
def make_image(self, smile: str, functional_group: str):
|
||||
mol = Chem.MolFromSmiles(smile)
|
||||
AllChem.GenerateDepictionMatching2DStructure(mol, self.templates[functional_group])
|
||||
|
||||
side_length = np.random.randint(70, 400)
|
||||
width = self.width or side_length
|
||||
height = self.height or side_length
|
||||
|
||||
image: Image = Draw.MolToImage(
|
||||
mol,
|
||||
size=(width, height),
|
||||
kekulize=flip_a_coin(),
|
||||
wedgeBonds=flip_a_coin(),
|
||||
)
|
||||
image.putalpha(255)
|
||||
return image
|
||||
|
||||
|
||||
@debug_log()
|
||||
def flip_a_coin():
|
||||
return bool(np.random.randint(0, 2))
|
||||
|
||||
|
||||
@debug_log()
|
||||
def collect_templates():
|
||||
functional_groups = FunctionalGroups.BuildFuncGroupHierarchy()
|
||||
group_name_2_pattern = dict(stream_label_pattern_pairs(functional_groups))
|
||||
return group_name_2_pattern
|
||||
|
||||
|
||||
@debug_log()
|
||||
def stream_label_pattern_pairs(functional_groups):
|
||||
for functional_group in functional_groups:
|
||||
yield functional_group.label, functional_group.pattern
|
||||
yield from stream_label_pattern_pairs(functional_group.children)
|
||||
|
||||
|
||||
@debug_log()
|
||||
def generate_image_of_structural_formula(smiles_file=None, size=None):
|
||||
"""Generate images of formulas from SMILE encoded formulas.
|
||||
|
||||
Args:
|
||||
smiles_file: CSV file with column "smiles". Each row contains a SMILE encoded formula.
|
||||
size: width, height
|
||||
|
||||
Returns:
|
||||
PIL.Image.Image: Image of a formula.
|
||||
"""
|
||||
logger.info(f"Generating structural formula images from {smiles_file}")
|
||||
image = first(generate_images_of_structural_formulas(smiles_file, size=size))
|
||||
if image:
|
||||
return image
|
||||
else:
|
||||
logger.warning(
|
||||
f"No structural formula images generated from {smiles_file}",
|
||||
filter=lambda m: not m.startswith("Depict error"),
|
||||
)
|
||||
raise ValueError(f"Could not generate structural formula image from {smiles_file}")
|
||||
|
||||
|
||||
@debug_log()
|
||||
def generate_images_of_structural_formulas(smiles_file=None, size=None):
|
||||
"""Generate an image of a formula from SMILE encoded formulas.
|
||||
|
||||
Args:
|
||||
smiles_file: CSV file with column "smiles". Each row contains a SMILE encoded formula.
|
||||
size: width, height
|
||||
|
||||
Yields:
|
||||
PIL.Image.Image: Image of a formula.
|
||||
"""
|
||||
size = size or (None, None)
|
||||
smiles_file = smiles_file or TEST_SMILES_FILE
|
||||
smiles = pd.read_csv(smiles_file).sample(frac=1).drop_duplicates().smiles
|
||||
yield from StructuralFormulaImageGenerator(*size).generate_images_from_smiles(smiles)
|
||||
0
synthesis/partitioner/__init__.py
Normal file
0
synthesis/partitioner/__init__.py
Normal file
71
synthesis/partitioner/page_partitioner.py
Normal file
71
synthesis/partitioner/page_partitioner.py
Normal file
@ -0,0 +1,71 @@
|
||||
import abc
|
||||
from typing import List, Tuple
|
||||
|
||||
from PIL import Image
|
||||
from funcy import lflatten
|
||||
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
from synthesis.randomization import rnd
|
||||
|
||||
|
||||
class PagePartitioner(abc.ABC):
|
||||
# TODO: produce boxes for page numbers, headers and footers
|
||||
def __init__(self):
|
||||
self.left_margin_percentage = 0.05
|
||||
self.right_margin_percentage = 0.05
|
||||
self.top_margin_percentage = 0.1
|
||||
self.bottom_margin_percentage = 0.1
|
||||
|
||||
self.recursive_margin_percentage = 0.007
|
||||
self.max_recursion_depth = 3
|
||||
self.initial_recursion_probability = 1
|
||||
self.recursion_probability_decay = 0.1
|
||||
|
||||
def __call__(self, page: Image.Image) -> List[Rectangle]:
|
||||
left_margin = int(page.width * self.left_margin_percentage)
|
||||
right_margin = int(page.width * self.right_margin_percentage)
|
||||
top_margin = int(page.height * self.top_margin_percentage)
|
||||
bottom_margin = int(page.height * self.bottom_margin_percentage)
|
||||
|
||||
box = Rectangle(left_margin, top_margin, page.width - right_margin, page.height - bottom_margin)
|
||||
boxes = lflatten(self.generate_content_boxes(box))
|
||||
return boxes
|
||||
|
||||
@abc.abstractmethod
|
||||
def generate_content_boxes(self, box: Rectangle, depth=0):
|
||||
raise NotImplementedError
|
||||
|
||||
def generate_child_boxes(self, box: Rectangle, axis, split_percentage=0.5) -> Tuple[Rectangle, Rectangle]:
|
||||
assert axis in ["x", "y"]
|
||||
|
||||
edge_anchor_point, edge_length = (box.x1, box.width) if axis == "x" else (box.y1, box.height)
|
||||
split_coordinate = split_percentage * edge_length + edge_anchor_point
|
||||
child_boxes = get_child_boxes(box, split_coordinate, axis, self.recursive_margin_percentage)
|
||||
return child_boxes
|
||||
|
||||
def recurse(self, depth):
|
||||
return rnd.random() <= self.recursion_probability(depth)
|
||||
|
||||
def recursion_probability(self, depth):
|
||||
return self.initial_recursion_probability * (1 - self.recursion_probability_decay) ** depth
|
||||
|
||||
|
||||
def get_child_boxes(box: Rectangle, split_coordinate, axis, margin_percentage) -> Tuple[Rectangle, Rectangle]:
|
||||
assert axis in ["x", "y"]
|
||||
|
||||
def low(point_1d):
|
||||
return point_1d * (1 + margin_percentage)
|
||||
|
||||
def high(point_1d):
|
||||
return point_1d * (1 - margin_percentage)
|
||||
|
||||
if axis == "x":
|
||||
return (
|
||||
Rectangle(low(box.x1), low(box.y1), high(split_coordinate), high(box.y2)),
|
||||
Rectangle(low(split_coordinate), low(box.y1), high(box.x2), high(box.y2)),
|
||||
)
|
||||
else:
|
||||
return (
|
||||
Rectangle(low(box.x1), low(box.y1), high(box.x2), high(split_coordinate)),
|
||||
Rectangle(low(box.x1), low(split_coordinate), high(box.x2), high(box.y2)),
|
||||
)
|
||||
22
synthesis/partitioner/random.py
Normal file
22
synthesis/partitioner/random.py
Normal file
@ -0,0 +1,22 @@
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
from synthesis.partitioner.page_partitioner import PagePartitioner
|
||||
from synthesis.randomization import rnd
|
||||
|
||||
|
||||
class RandomPagePartitioner(PagePartitioner):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def generate_content_boxes(self, box: Rectangle, depth=0):
|
||||
if depth >= self.max_recursion_depth:
|
||||
yield box
|
||||
else:
|
||||
child_boxes = self.generate_child_boxes(
|
||||
box,
|
||||
axis=rnd.choice(["x", "y"]),
|
||||
split_percentage=rnd.uniform(0.3, 0.7),
|
||||
)
|
||||
if self.recurse(depth):
|
||||
yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
|
||||
else:
|
||||
yield child_boxes
|
||||
25
synthesis/partitioner/two_column.py
Normal file
25
synthesis/partitioner/two_column.py
Normal file
@ -0,0 +1,25 @@
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
from synthesis.partitioner.page_partitioner import PagePartitioner
|
||||
from synthesis.randomization import rnd
|
||||
|
||||
|
||||
class TwoColumnPagePartitioner(PagePartitioner):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.max_recursion_depth = 3
|
||||
|
||||
def generate_content_boxes(self, box: Rectangle, depth=0):
|
||||
if depth >= self.max_recursion_depth:
|
||||
yield box
|
||||
|
||||
else:
|
||||
if depth == 0:
|
||||
axis = "x"
|
||||
split_percentage = 0.5
|
||||
else:
|
||||
axis = "y"
|
||||
split_percentage = rnd.choice([0.3, 0.7])
|
||||
|
||||
child_boxes = self.generate_child_boxes(box, axis=axis, split_percentage=split_percentage)
|
||||
|
||||
yield from (self.generate_content_boxes(b, depth + 1) for b in child_boxes)
|
||||
37
synthesis/randomization.py
Normal file
37
synthesis/randomization.py
Normal file
@ -0,0 +1,37 @@
|
||||
import random
|
||||
from functools import lru_cache
|
||||
|
||||
from loguru import logger
|
||||
|
||||
random_seed = random.randint(0, 2**32 - 1)
|
||||
# random_seed = 2973413116
|
||||
# random_seed = 2212357755
|
||||
|
||||
# random_seed = 2987558464 # light green
|
||||
|
||||
# random_seed = 1173898033 # strange bar plot
|
||||
|
||||
# 2467967671
|
||||
|
||||
# random_seed = 237553299
|
||||
# random_seed = 1021421466
|
||||
|
||||
logger.info(f"Random seed: {random_seed}")
|
||||
rnd = random.Random(random_seed)
|
||||
|
||||
|
||||
def maybe():
|
||||
return rnd.random() > 0.9
|
||||
|
||||
|
||||
def possibly():
|
||||
return rnd.random() > 0.5
|
||||
|
||||
|
||||
def probably():
|
||||
return rnd.random() > 0.4
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_random_seed():
|
||||
return rnd.randint(0, 2**32 - 1)
|
||||
17
synthesis/segment/__init__.py
Normal file
17
synthesis/segment/__init__.py
Normal file
@ -0,0 +1,17 @@
|
||||
import argparse
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument()
|
||||
args = parser.parse_args()
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def main(args):
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(parse_args())
|
||||
13
synthesis/segment/content_rectangle.py
Normal file
13
synthesis/segment/content_rectangle.py
Normal file
@ -0,0 +1,13 @@
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
|
||||
|
||||
class ContentRectangle(Rectangle):
|
||||
def __init__(self, x1, y1, x2, y2, content=None):
|
||||
super().__init__(x1, y1, x2, y2)
|
||||
self.content = content
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.__class__.__name__}({self.x1}, {self.y1}, {self.x2}, {self.y2}, content={self.content})"
|
||||
|
||||
def accept(self, visitor):
|
||||
return visitor.visit_content_rectangle(self)
|
||||
221
synthesis/segment/plot.py
Normal file
221
synthesis/segment/plot.py
Normal file
@ -0,0 +1,221 @@
|
||||
import io
|
||||
import random
|
||||
from functools import lru_cache, partial
|
||||
|
||||
import loguru
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from matplotlib import pyplot as plt
|
||||
from matplotlib.colors import ListedColormap
|
||||
|
||||
from cv_analysis.utils.geometric import is_square_like, is_wide, is_tall
|
||||
from cv_analysis.utils.image_operations import superimpose
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
from synthesis.formula import generate_image_of_structural_formula
|
||||
from synthesis.randomization import rnd, probably, maybe
|
||||
from synthesis.segment.random_content_rectangle import RandomContentRectangle
|
||||
from synthesis.text.text import generate_random_words
|
||||
|
||||
|
||||
class RandomPlot(RandomContentRectangle):
|
||||
def __init__(self, x1, y1, x2, y2, seed=None):
|
||||
super().__init__(x1, y1, x2, y2, seed=seed)
|
||||
|
||||
self.cmap = pick_colormap()
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def generate_random_plot(self, rectangle: Rectangle):
|
||||
|
||||
if is_square_like(rectangle):
|
||||
plt_fn = rnd.choice(
|
||||
[
|
||||
self.generate_random_line_plot,
|
||||
self.generate_random_bar_plot,
|
||||
self.generate_random_scatter_plot,
|
||||
self.generate_random_histogram,
|
||||
self.generate_random_pie_chart,
|
||||
self.generate_random_heat_map,
|
||||
self.generate_random_structural_formula,
|
||||
]
|
||||
)
|
||||
elif is_wide(rectangle):
|
||||
plt_fn = rnd.choice(
|
||||
[
|
||||
self.generate_random_line_plot,
|
||||
self.generate_random_histogram,
|
||||
self.generate_random_bar_plot,
|
||||
self.generate_random_structural_formula,
|
||||
]
|
||||
)
|
||||
elif is_tall(rectangle):
|
||||
plt_fn = rnd.choice(
|
||||
[
|
||||
self.generate_random_bar_plot,
|
||||
self.generate_random_histogram,
|
||||
self.generate_random_structural_formula,
|
||||
]
|
||||
)
|
||||
else:
|
||||
plt_fn = self.generate_random_scatter_plot
|
||||
|
||||
plt_fn(rectangle)
|
||||
|
||||
def generate_random_bar_plot(self, rectangle: Rectangle):
|
||||
x = sorted(np.random.randint(low=1, high=11, size=5))
|
||||
y = np.random.randint(low=1, high=11, size=5)
|
||||
bar_fn = partial(
|
||||
plt.bar,
|
||||
log=random.choice([True, False]),
|
||||
)
|
||||
self.__generate_random_plot(bar_fn, rectangle, x, y)
|
||||
|
||||
def generate_random_line_plot(self, rectangle: Rectangle):
|
||||
f = rnd.choice([np.sin, np.cos, np.tan, np.exp, np.log, np.sqrt, np.square])
|
||||
|
||||
x = np.linspace(0, 10, 100)
|
||||
y = f(x)
|
||||
|
||||
plot_fn = partial(
|
||||
plt.plot,
|
||||
)
|
||||
|
||||
self.__generate_random_plot(plot_fn, rectangle, x, y)
|
||||
|
||||
def generate_random_scatter_plot(self, rectangle: Rectangle):
|
||||
n = rnd.randint(10, 40)
|
||||
x = np.random.normal(size=n)
|
||||
y = np.random.normal(size=n)
|
||||
scatter_fn = partial(
|
||||
plt.scatter,
|
||||
cmap=self.cmap,
|
||||
marker=rnd.choice(["o", "*", "+", "x"]),
|
||||
)
|
||||
|
||||
self.__generate_random_plot(scatter_fn, rectangle, x, y)
|
||||
|
||||
def generate_random_histogram(self, rectangle: Rectangle):
|
||||
x = np.random.normal(size=100)
|
||||
hist_fn = partial(
|
||||
plt.hist,
|
||||
orientation=random.choice(["horizontal", "vertical"]),
|
||||
histtype=random.choice(["bar", "barstacked", "step", "stepfilled"]),
|
||||
log=random.choice([True, False]),
|
||||
stacked=random.choice([True, False]),
|
||||
density=random.choice([True, False]),
|
||||
cumulative=random.choice([True, False]),
|
||||
)
|
||||
self.__generate_random_plot(hist_fn, rectangle, x, random.randint(5, 20))
|
||||
|
||||
def generate_random_pie_chart(self, rectangle: Rectangle):
|
||||
|
||||
n = random.randint(3, 7)
|
||||
x = np.random.uniform(size=n)
|
||||
pie_fn = partial(
|
||||
plt.pie,
|
||||
shadow=True,
|
||||
startangle=90,
|
||||
pctdistance=0.85,
|
||||
labeldistance=1.1,
|
||||
colors=self.cmap(np.linspace(0, 1, 10)),
|
||||
)
|
||||
self.__generate_random_plot(
|
||||
pie_fn,
|
||||
rectangle,
|
||||
x,
|
||||
np.random.uniform(0, 0.1, size=n),
|
||||
plot_kwargs=self.generate_plot_kwargs(keywords=["a"]),
|
||||
)
|
||||
|
||||
def generate_random_heat_map(self, rectangle: Rectangle):
|
||||
|
||||
n = random.randint(3, 7)
|
||||
m = random.randint(3, 7)
|
||||
x = np.random.uniform(size=(n, m))
|
||||
heat_map_fn = lambda x, y: plt.imshow(x, interpolation="nearest")
|
||||
self.__generate_random_plot(
|
||||
heat_map_fn,
|
||||
rectangle,
|
||||
x,
|
||||
np.random.uniform(0, 0.1, size=n),
|
||||
plot_kwargs=self.generate_plot_kwargs(keywords=["a"]),
|
||||
)
|
||||
|
||||
def generate_random_structural_formula(self, rectangle: Rectangle):
|
||||
try:
|
||||
image = generate_image_of_structural_formula(size=rectangle.size)
|
||||
self.content = image if not self.content else superimpose(self.content, image)
|
||||
# fallback, since structural formula generation can fail
|
||||
except ValueError:
|
||||
self.generate_random_scatter_plot(rectangle)
|
||||
|
||||
def generate_plot_kwargs(self, keywords=None):
|
||||
|
||||
kwargs = {
|
||||
"color": rnd.choice(self.cmap.colors),
|
||||
"linestyle": rnd.choice(["-", "--", "-.", ":"]),
|
||||
"linewidth": rnd.uniform(1, 4),
|
||||
}
|
||||
|
||||
return kwargs if not keywords else {k: v for k, v in kwargs.items() if k in keywords}
|
||||
|
||||
def __generate_random_plot(self, plot_fn, rectangle: Rectangle, x, y, plot_kwargs=None):
|
||||
|
||||
plot_kwargs = self.generate_plot_kwargs() if plot_kwargs is None else plot_kwargs
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
fig.set_size_inches(rectangle.width / 100, rectangle.height / 100)
|
||||
fig.tight_layout(pad=0)
|
||||
|
||||
plot_fn(x, y, **plot_kwargs)
|
||||
ax.set_facecolor("none")
|
||||
|
||||
probably() and ax.set_title(generate_random_words(1, 3))
|
||||
|
||||
# disable axes at random
|
||||
maybe() and ax.set_xticks([])
|
||||
maybe() and ax.set_yticks([])
|
||||
maybe() and ax.set_xticklabels([])
|
||||
maybe() and ax.set_yticklabels([])
|
||||
maybe() and ax.set_xlabel("")
|
||||
maybe() and ax.set_ylabel("")
|
||||
maybe() and ax.set_title("")
|
||||
maybe() and ax.set_frame_on(False)
|
||||
|
||||
# remove spines at random
|
||||
maybe() and (ax.spines["top"].set_visible(False) or ax.spines["right"].set_visible(False))
|
||||
|
||||
image = dump_plt_to_image(rectangle)
|
||||
assert image.mode == "RGBA"
|
||||
|
||||
self.content = image if not self.content else superimpose(self.content, image)
|
||||
|
||||
return ax, fig
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def pick_colormap() -> ListedColormap:
|
||||
cmap_name = rnd.choice(
|
||||
[
|
||||
"viridis",
|
||||
"plasma",
|
||||
"inferno",
|
||||
"magma",
|
||||
"cividis",
|
||||
],
|
||||
)
|
||||
loguru.logger.info(f"Using colormap {cmap_name}")
|
||||
cmap = plt.get_cmap(cmap_name)
|
||||
return cmap
|
||||
|
||||
|
||||
def dump_plt_to_image(rectangle):
|
||||
buf = io.BytesIO()
|
||||
plt.savefig(buf, format="png", transparent=True)
|
||||
buf.seek(0)
|
||||
image = Image.open(buf)
|
||||
image = image.resize((rectangle.width, rectangle.height))
|
||||
buf.close()
|
||||
plt.close()
|
||||
return image
|
||||
11
synthesis/segment/random_content_rectangle.py
Normal file
11
synthesis/segment/random_content_rectangle.py
Normal file
@ -0,0 +1,11 @@
|
||||
import random
|
||||
|
||||
from synthesis.randomization import get_random_seed
|
||||
from synthesis.segment.content_rectangle import ContentRectangle
|
||||
|
||||
|
||||
class RandomContentRectangle(ContentRectangle):
|
||||
def __init__(self, x1, y1, x2, y2, content=None, seed=None):
|
||||
super().__init__(x1, y1, x2, y2, content)
|
||||
self.seed = seed or get_random_seed()
|
||||
self.random = random.Random(self.seed)
|
||||
14
synthesis/segment/recursive_content_rectangle.py
Normal file
14
synthesis/segment/recursive_content_rectangle.py
Normal file
@ -0,0 +1,14 @@
|
||||
from synthesis.segment.content_rectangle import ContentRectangle
|
||||
|
||||
|
||||
class RecursiveContentRectangle(ContentRectangle):
|
||||
def __init__(self, x1, y1, x2, y2, content=None):
|
||||
super().__init__(x1, y1, x2, y2)
|
||||
self.content = content
|
||||
|
||||
@property
|
||||
def children(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def accept(self, visitor):
|
||||
return visitor.visit_recursive_content_rectangle(self)
|
||||
102
synthesis/segment/segments.py
Normal file
102
synthesis/segment/segments.py
Normal file
@ -0,0 +1,102 @@
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
from synthesis.randomization import probably, rnd
|
||||
from synthesis.segment.content_rectangle import ContentRectangle
|
||||
from synthesis.segment.plot import RandomPlot
|
||||
from synthesis.segment.text_block import TextBlock
|
||||
from synthesis.text.font import pick_random_font_available_on_system
|
||||
from synthesis.text.text_block_generator.caption import CaptionGenerator
|
||||
|
||||
|
||||
def generate_random_plot(rectangle: Rectangle) -> ContentRectangle:
|
||||
block = RandomPlot(*rectangle.coords)
|
||||
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
||||
block.generate_random_plot(rectangle)
|
||||
return block
|
||||
|
||||
|
||||
def generate_recursive_random_table(rectangle: Rectangle, **kwargs) -> ContentRectangle:
|
||||
from synthesis.segment.table.table import RecursiveRandomTable
|
||||
|
||||
block = RecursiveRandomTable(*rectangle.coords, **kwargs)
|
||||
if isinstance(rectangle, RecursiveRandomTable):
|
||||
block.content = rectangle.content if rectangle.content else None # TODO: Refactor
|
||||
block.generate_random_table()
|
||||
return block
|
||||
|
||||
|
||||
def generate_text_block(rectangle: Rectangle, text) -> ContentRectangle:
|
||||
block = TextBlock(
|
||||
*rectangle.coords,
|
||||
font=pick_random_font_available_on_system(
|
||||
includes=("serif", "sans-serif", "bold"),
|
||||
excludes=("mono", "italic", "oblique", "cursive"),
|
||||
),
|
||||
font_size=30, # TODO: De-hardcode font size... Seems to have no effect on top of that
|
||||
)
|
||||
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
||||
block.put_text(text, rectangle)
|
||||
return block
|
||||
|
||||
|
||||
def generate_random_plot_with_caption(rectangle: Rectangle):
|
||||
# TODO: deduplicate with generate_random_table_with_caption
|
||||
plot_box, caption_box = split_into_figure_and_caption(rectangle)
|
||||
plot_box = generate_random_plot(plot_box)
|
||||
caption_box = generate_random_image_caption(caption_box)
|
||||
return plot_box, caption_box
|
||||
|
||||
|
||||
def generate_recursive_random_table_with_caption(rectangle: Rectangle):
|
||||
table_box, caption_box = split_into_figure_and_caption(rectangle)
|
||||
table_box = generate_recursive_random_table(table_box, double_rule=probably())
|
||||
caption_box = generate_random_table_caption(caption_box)
|
||||
return table_box, caption_box
|
||||
|
||||
|
||||
def split_into_figure_and_caption(rectangle: Rectangle):
|
||||
gap_percentage = rnd.uniform(0, 0.03)
|
||||
split_point = rnd.uniform(0.5, 0.9)
|
||||
figure_box = Rectangle(
|
||||
rectangle.x1, rectangle.y1, rectangle.x2, rectangle.y1 + rectangle.height * (split_point - gap_percentage / 2)
|
||||
)
|
||||
caption_box = Rectangle(
|
||||
rectangle.x1, rectangle.y1 + rectangle.height * (split_point + gap_percentage / 2), rectangle.x2, rectangle.y2
|
||||
)
|
||||
return figure_box, caption_box
|
||||
|
||||
|
||||
def generate_random_text_block(rectangle: Rectangle, n_sentences=None) -> ContentRectangle:
|
||||
block = TextBlock(
|
||||
*rectangle.coords,
|
||||
font=pick_random_font_available_on_system(
|
||||
includes=("serif", "sans-serif"),
|
||||
excludes=("bold", "mono", "italic", "oblique", "cursive"),
|
||||
),
|
||||
font_size=30, # TODO: De-hardcode font size... Seems to have no effect on top of that
|
||||
)
|
||||
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
||||
block.generate_random_text(rectangle, n_sentences)
|
||||
return block
|
||||
|
||||
|
||||
def generate_random_image_caption(rectangle: Rectangle) -> ContentRectangle:
|
||||
return generate_random_caption(rectangle, f"Fig. {rnd.randint(1, 20)}")
|
||||
|
||||
|
||||
def generate_random_table_caption(rectangle: Rectangle) -> ContentRectangle:
|
||||
return generate_random_caption(rectangle, f"Tabl. {rnd.randint(1, 20)}")
|
||||
|
||||
|
||||
def generate_random_caption(rectangle: Rectangle, caption_start, n_sentences=None) -> ContentRectangle:
|
||||
block = TextBlock(
|
||||
*rectangle.coords,
|
||||
text_generator=CaptionGenerator(caption_start=caption_start),
|
||||
font=pick_random_font_available_on_system(
|
||||
includes=("italic",),
|
||||
excludes=("bold", "mono"),
|
||||
),
|
||||
font_size=100, # TODO: De-hardcode font size... Seems to have no effect on top of that
|
||||
)
|
||||
block.content = rectangle.content if isinstance(rectangle, ContentRectangle) else None # TODO: Refactor
|
||||
block.generate_random_text(rectangle, n_sentences)
|
||||
return block
|
||||
0
synthesis/segment/table/__init__.py
Normal file
0
synthesis/segment/table/__init__.py
Normal file
84
synthesis/segment/table/cell.py
Normal file
84
synthesis/segment/table/cell.py
Normal file
@ -0,0 +1,84 @@
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
from cv_analysis.utils.image_operations import superimpose
|
||||
from synthesis.segment.content_rectangle import ContentRectangle
|
||||
|
||||
|
||||
class Cell(ContentRectangle):
|
||||
def __init__(self, x1, y1, x2, y2, color=None):
|
||||
super().__init__(x1, y1, x2, y2)
|
||||
|
||||
self.background_color = color or (255, 255, 255, 0)
|
||||
|
||||
# to debug use random border color: tuple([random.randint(100, 200) for _ in range(3)] + [255])
|
||||
self.cell_border_color = (0, 0, 0, 255)
|
||||
|
||||
self.border_width = 1
|
||||
self.inset = 1
|
||||
|
||||
self.content = Image.new("RGBA", (self.width, self.height))
|
||||
self.fill()
|
||||
|
||||
def has_child_boxes(self):
|
||||
return False
|
||||
|
||||
def draw_top_border(self, width=None):
|
||||
self.draw_line((0, 0, self.width - self.inset, 0), width=width)
|
||||
return self
|
||||
|
||||
def draw_bottom_border(self, width=None):
|
||||
self.draw_line((0, self.height - self.inset, self.width - self.inset, self.height - self.inset), width=width)
|
||||
return self
|
||||
|
||||
def draw_left_border(self, width=None):
|
||||
self.draw_line((0, 0, 0, self.height), width=width)
|
||||
return self
|
||||
|
||||
def draw_right_border(self, width=None):
|
||||
self.draw_line((self.width - self.inset, 0, self.width - self.inset, self.height), width=width)
|
||||
return self
|
||||
|
||||
def draw_line(self, points, width=None):
|
||||
width = width or self.border_width
|
||||
draw = ImageDraw.Draw(self.content)
|
||||
draw.line(points, width=width, fill=self.cell_border_color)
|
||||
return self
|
||||
|
||||
def draw(self, width=None):
|
||||
self.draw_top_border(width=width)
|
||||
self.draw_bottom_border(width=width)
|
||||
self.draw_left_border(width=width)
|
||||
self.draw_right_border(width=width)
|
||||
return self
|
||||
|
||||
def draw_top_left_corner(self, width=None):
|
||||
self.draw_line((0, 0, 0, 0), width=width)
|
||||
self.draw_line((0, 0, 0, 0), width=width)
|
||||
return self
|
||||
|
||||
def draw_top_right_corner(self, width=None):
|
||||
self.draw_line((self.width - self.inset, 0, self.width - self.inset, 0), width=width)
|
||||
self.draw_line((self.width - self.inset, 0, self.width - self.inset, 0), width=width)
|
||||
return self
|
||||
|
||||
def draw_bottom_left_corner(self, width=None):
|
||||
self.draw_line((0, self.height - self.inset, 0, self.height - self.inset), width=width)
|
||||
self.draw_line((0, self.height - self.inset, 0, self.height - self.inset), width=width)
|
||||
return self
|
||||
|
||||
def draw_bottom_right_corner(self, width=None):
|
||||
self.draw_line(
|
||||
(self.width - self.inset, self.height - self.inset, self.width - self.inset, self.height - self.inset),
|
||||
width=width,
|
||||
)
|
||||
self.draw_line(
|
||||
(self.width - self.inset, self.height - self.inset, self.width - self.inset, self.height - self.inset),
|
||||
width=width,
|
||||
)
|
||||
return self
|
||||
|
||||
def fill(self, color=None):
|
||||
color = color or self.background_color
|
||||
image = Image.new("RGBA", (self.width, self.height), color=color)
|
||||
self.content = superimpose(image, self.content)
|
||||
return self
|
||||
316
synthesis/segment/table/table.py
Normal file
316
synthesis/segment/table/table.py
Normal file
@ -0,0 +1,316 @@
|
||||
import random
|
||||
from copy import deepcopy
|
||||
from enum import Enum
|
||||
from functools import lru_cache, partial
|
||||
from math import sqrt
|
||||
from typing import List, Iterable
|
||||
|
||||
from PIL import Image
|
||||
from funcy import chunks, mapcat, repeatedly
|
||||
from loguru import logger
|
||||
|
||||
from cv_analysis.utils.geometric import is_square_like
|
||||
from cv_analysis.utils.image_operations import superimpose
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
from cv_analysis.utils.spacial import area
|
||||
from synthesis.randomization import rnd, possibly, maybe
|
||||
from synthesis.segment.content_rectangle import ContentRectangle
|
||||
from synthesis.segment.plot import pick_colormap
|
||||
from synthesis.segment.random_content_rectangle import RandomContentRectangle
|
||||
from synthesis.segment.recursive_content_rectangle import RecursiveContentRectangle
|
||||
from synthesis.segment.segments import generate_random_plot, generate_recursive_random_table, generate_text_block
|
||||
from synthesis.segment.table.cell import Cell
|
||||
from synthesis.text.text import generate_random_words, generate_random_number
|
||||
|
||||
|
||||
class RecursiveRandomTable(RandomContentRectangle, RecursiveContentRectangle):
|
||||
def __init__(self, x1, y1, x2, y2, border_width=1, layout: str = None, double_rule=False):
|
||||
"""A table with a random number of rows and columns, and random content in each cell.
|
||||
|
||||
Args:
|
||||
x1: x-coordinate of the top-left corner
|
||||
y1: y-coordinate of the top-left corner
|
||||
x2: x-coordinate of the bottom-right corner
|
||||
y2: y-coordinate of the bottom-right corner
|
||||
border_width: width of the table border
|
||||
layout: layout of the table, either "horizontal", "vertical", "closed", or "open"
|
||||
double_rule: whether to use double rules as the top and bottom rules
|
||||
"""
|
||||
|
||||
assert layout in [None, "horizontal", "vertical", "closed", "open"]
|
||||
|
||||
super().__init__(x1, y1, x2, y2)
|
||||
|
||||
self.double_rule = double_rule
|
||||
self.double_rule_width = (3 * border_width) if self.double_rule else 0
|
||||
|
||||
self.n_columns = rnd.randint(1, max(self.width // 100, 1))
|
||||
self.n_rows = rnd.randint(1, max((self.height - 2 * self.double_rule_width) // rnd.randint(17, 100), 1))
|
||||
self.cell_size = (self.width / self.n_columns, (self.height - 2 * self.double_rule_width) / self.n_rows)
|
||||
|
||||
self.content = Image.new("RGBA", (self.width, self.height), (255, 255, 255, 0))
|
||||
|
||||
self.background_color = maybe() and get_random_background_color() or (255, 255, 255, 0)
|
||||
|
||||
self.layout = layout or self.pick_random_layout()
|
||||
logger.debug(f"Layout: {self.layout}")
|
||||
|
||||
self.__cells = []
|
||||
|
||||
@property
|
||||
def cells(self):
|
||||
return self.__cells
|
||||
|
||||
@property
|
||||
def children(self):
|
||||
for cell in self.cells:
|
||||
# TODO: this is not very clean
|
||||
cell = deepcopy(cell)
|
||||
cell.shift(self.x1, self.y1)
|
||||
yield cell
|
||||
|
||||
def pick_random_layout(self):
|
||||
|
||||
if self.n_columns == 1 and self.n_rows == 1:
|
||||
layout = "closed"
|
||||
elif self.n_columns == 1:
|
||||
layout = rnd.choice(["vertical", "closed"])
|
||||
elif self.n_rows == 1:
|
||||
layout = rnd.choice(["horizontal", "closed"])
|
||||
else:
|
||||
layout = rnd.choice(["closed", "horizontal", "vertical", "open"])
|
||||
|
||||
return layout
|
||||
|
||||
def generate_random_table(self, draw_cell_content=False):
|
||||
"""Generate a random table. The table is generated by first generating a random layout, and then filling the
|
||||
cells with content recursively.
|
||||
|
||||
Args:
|
||||
draw_cell_content: Whether to draw the content of each cell. If False, only the table border is drawn. Cells
|
||||
can be accessed and drawn later.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
cells = self.generate_table()
|
||||
cells = list(self.fill_cells_with_content(cells))
|
||||
cells = list(self.draw_cell_borders(cells))
|
||||
|
||||
# TODO: This is not very clean.
|
||||
if draw_cell_content:
|
||||
self.content = paste_contents(self.content, cells)
|
||||
|
||||
assert self.content.mode == "RGBA"
|
||||
|
||||
self.__cells.extend(cells)
|
||||
|
||||
def fill_cells_with_content(self, cells):
|
||||
yield from map(self.build_cell, cells)
|
||||
|
||||
def build_cell(self, cell):
|
||||
|
||||
if self.__is_a_small_cell(cell):
|
||||
cell = self.build_small_cell(cell)
|
||||
|
||||
elif self.__is_a_medium_sized_cell(cell):
|
||||
cell = self.build_medium_sized_cell(cell)
|
||||
|
||||
elif self.__is_a_large_cell(cell):
|
||||
cell = self.build_large_cell(cell)
|
||||
|
||||
else:
|
||||
raise ValueError(f"Invalid cell size: {get_size(cell)}")
|
||||
|
||||
assert cell.content.mode == "RGBA"
|
||||
|
||||
return cell
|
||||
|
||||
def __is_a_small_cell(self, cell):
|
||||
return get_size(cell) <= Size.SMALL.value
|
||||
|
||||
def __is_a_medium_sized_cell(self, cell):
|
||||
return get_size(cell) <= Size.MEDIUM.value
|
||||
|
||||
def __is_a_large_cell(self, cell):
|
||||
return get_size(cell) > Size.MEDIUM.value
|
||||
|
||||
def build_small_cell(self, cell):
|
||||
|
||||
content = (possibly() and generate_random_words(1, 3)) or (
|
||||
generate_random_number()
|
||||
+ ((possibly() and " " + rnd.choice(["$", "£", "%", "EUR", "USD", "CAD", "ADA"])) or "")
|
||||
)
|
||||
|
||||
return generate_text_block(cell, content)
|
||||
|
||||
def build_medium_sized_cell(self, cell):
|
||||
|
||||
choice = rnd.choice(["plot", "recurse"])
|
||||
|
||||
if choice == "plot":
|
||||
return generate_random_plot(cell)
|
||||
|
||||
elif choice == "recurse":
|
||||
return generate_recursive_random_table(
|
||||
cell,
|
||||
border_width=1,
|
||||
layout=random.choice(["open", "horizontal", "vertical"]),
|
||||
double_rule=False,
|
||||
)
|
||||
|
||||
else:
|
||||
return generate_text_block(cell, f"{choice} {get_size(cell):.0f} {get_size_class(cell).name}")
|
||||
|
||||
def build_large_cell(self, cell):
|
||||
choice = rnd.choice(["plot", "recurse"])
|
||||
|
||||
logger.debug(f"Generating {choice} {get_size(cell):.0f} {get_size_class(cell).name}")
|
||||
|
||||
if choice == "plot" and is_square_like(cell):
|
||||
return generate_random_plot(cell)
|
||||
|
||||
else:
|
||||
logger.debug(f"recurse {get_size(cell):.0f} {get_size_class(cell).name}")
|
||||
return generate_recursive_random_table(
|
||||
cell,
|
||||
border_width=1,
|
||||
layout=random.choice(["open", "horizontal", "vertical"]),
|
||||
double_rule=False,
|
||||
)
|
||||
|
||||
def draw_cell_borders(self, cells: List[ContentRectangle]):
|
||||
|
||||
columns = chunks(self.n_rows, cells)
|
||||
|
||||
for col_idx, column in enumerate(columns):
|
||||
for row_index, cell in enumerate(column):
|
||||
self.draw_cell(cell, col_idx, row_index)
|
||||
yield cell
|
||||
|
||||
if self.layout == "closed":
|
||||
self.draw_table_borders()
|
||||
|
||||
if self.double_rule:
|
||||
self.draw_table_rule()
|
||||
|
||||
def draw_cell(self, cell, col_idx, row_index):
|
||||
# TODO: Refactor
|
||||
c = Cell(*cell.coords, self.background_color)
|
||||
c.content = cell.content
|
||||
self.draw_edges_based_on_position(c, col_idx, row_index)
|
||||
|
||||
def draw_edges_based_on_position(self, cell: Cell, col_idx, row_index):
|
||||
"""Draw the edges of a cell based on its position in the table."""
|
||||
if col_idx < self.n_columns - 1:
|
||||
cell.draw_right_border()
|
||||
|
||||
if row_index < self.n_rows - 1:
|
||||
cell.draw_bottom_border()
|
||||
|
||||
def draw_table_rule(self):
|
||||
# TODO: Refactor
|
||||
c1 = Cell(*self.coords)
|
||||
c1.draw_top_border(width=1)
|
||||
c1.draw_bottom_border(width=1)
|
||||
|
||||
x1, y1, x2, y2 = self.coords
|
||||
c2 = Cell(x1, y1 + self.double_rule_width, x2, y2 - self.double_rule_width)
|
||||
c2.draw_top_border(width=1)
|
||||
c2.draw_bottom_border(width=1)
|
||||
|
||||
c = superimpose(c1.content, c2.content)
|
||||
|
||||
self.content = superimpose(c, self.content)
|
||||
|
||||
def draw_table_borders(self):
|
||||
# TODO: Refactor
|
||||
c = Cell(*self.coords, self.background_color)
|
||||
c.content = self.content
|
||||
c.draw()
|
||||
yield self
|
||||
|
||||
def generate_table(self) -> Iterable[ContentRectangle]:
|
||||
yield from mapcat(self.generate_column, range(self.n_columns))
|
||||
|
||||
def generate_column(self, column_index) -> Iterable[ContentRectangle]:
|
||||
logger.trace(f"Generating column {column_index}.")
|
||||
generate_cell_for_row_index = partial(self.generate_cell, column_index)
|
||||
yield from map(generate_cell_for_row_index, range(self.n_rows))
|
||||
|
||||
def generate_cell(self, column_index, row_index) -> ContentRectangle:
|
||||
w, h = self.cell_size
|
||||
x1, y1 = (column_index * w), (row_index * h) + self.double_rule_width
|
||||
x2, y2 = x1 + w, y1 + h
|
||||
logger.trace(f"Generating cell ({row_index}, {column_index}) at ({x1}, {y1}, {x2}, {y2}).")
|
||||
return Cell(x1, y1, x2, y2, self.background_color)
|
||||
|
||||
def generate_column_names(self):
|
||||
column_names = repeatedly(self.generate_column_name, self.n_columns)
|
||||
return column_names
|
||||
|
||||
def generate_column_name(self):
|
||||
column_name = generate_random_words(1, 3)
|
||||
return column_name
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_random_background_color():
|
||||
return tuple([*get_random_color_complementing_color_map(pick_colormap()), rnd.randint(100, 210)])
|
||||
|
||||
|
||||
def get_random_color_complementing_color_map(colormap):
|
||||
def color_complement(r, g, b):
|
||||
"""Reference: https://stackoverflow.com/a/40234924"""
|
||||
|
||||
def hilo(a, b, c):
|
||||
if c < b:
|
||||
b, c = c, b
|
||||
if b < a:
|
||||
a, b = b, a
|
||||
if c < b:
|
||||
b, c = c, b
|
||||
return a + c
|
||||
|
||||
k = hilo(r, g, b)
|
||||
return tuple(k - u for u in (r, g, b))
|
||||
|
||||
color = colormap(0.2)[:3]
|
||||
color = [int(255 * v) for v in color]
|
||||
color = color_complement(*color)
|
||||
return color
|
||||
|
||||
|
||||
def paste_contents(page, contents: Iterable[ContentRectangle]):
|
||||
page = deepcopy(page)
|
||||
for content in contents:
|
||||
paste_content(page, content)
|
||||
return page
|
||||
|
||||
|
||||
def paste_content(page, content_box: ContentRectangle):
|
||||
assert content_box.content.mode == "RGBA"
|
||||
page.paste(content_box.content, (content_box.x1, content_box.y1), content_box.content)
|
||||
return page
|
||||
|
||||
|
||||
def get_size_class(rectangle: Rectangle):
|
||||
size = get_size(rectangle)
|
||||
if size < Size.SMALL.value:
|
||||
return Size.SMALL
|
||||
elif size < Size.LARGE.value:
|
||||
return Size.MEDIUM
|
||||
else:
|
||||
return Size.LARGE
|
||||
|
||||
|
||||
def get_size(rectangle: Rectangle):
|
||||
size = sqrt(area(rectangle))
|
||||
return size
|
||||
|
||||
|
||||
class Size(Enum):
|
||||
# FIXME: this has to scale with the DPI
|
||||
SMALL = 120
|
||||
MEDIUM = 180
|
||||
LARGE = 300
|
||||
77
synthesis/segment/text_block.py
Normal file
77
synthesis/segment/text_block.py
Normal file
@ -0,0 +1,77 @@
|
||||
from math import ceil
|
||||
from typing import List
|
||||
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
from funcy import first, take
|
||||
|
||||
from cv_analysis.logging import dev_logger, debug_log
|
||||
from cv_analysis.utils.image_operations import superimpose
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
from synthesis.segment.content_rectangle import ContentRectangle
|
||||
from synthesis.text.font import pick_random_mono_space_font_available_on_system
|
||||
from synthesis.text.text_block_generator.paragraph import ParagraphGenerator
|
||||
|
||||
|
||||
class TextBlock(ContentRectangle):
|
||||
def __init__(self, x1, y1, x2, y2, text_generator=None, font=None, font_size=None):
|
||||
dev_logger.trace(f"Creating text block at {x1, y1, x2, y2}.")
|
||||
super().__init__(x1, y1, x2, y2)
|
||||
self.font = font or ImageFont.load_default() # pick_random_font_available_on_system(size=font_size)
|
||||
self.text_generator = text_generator or ParagraphGenerator()
|
||||
|
||||
@debug_log()
|
||||
def __call__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
@debug_log()
|
||||
def generate_random_text(self, rectangle: Rectangle, n_sentences=None):
|
||||
lines = self.text_generator(rectangle, n_sentences)
|
||||
image = write_lines_to_image(lines, rectangle, self.font)
|
||||
return self.__put_content(image)
|
||||
|
||||
@debug_log()
|
||||
def put_text(self, text: str, rectangle: Rectangle):
|
||||
|
||||
text_width, text_height = self.font.getsize(text)
|
||||
|
||||
width_delta = text_width - rectangle.width
|
||||
height_delta = text_height - rectangle.height
|
||||
|
||||
image = Image.new("RGBA", (text_width, text_height), (0, 255, 255, 0))
|
||||
|
||||
if width_delta > 0 or height_delta > 0:
|
||||
image = image.resize((int(rectangle.width * 0.9), text_height))
|
||||
|
||||
draw = ImageDraw.Draw(image)
|
||||
draw.text((0, 0), text, font=self.font, fill=(0, 0, 0, 255))
|
||||
return self.__put_content(image)
|
||||
|
||||
@debug_log()
|
||||
def __put_content(self, image: Image.Image):
|
||||
self.content = image if not self.content else superimpose(self.content, image)
|
||||
assert self.content.mode == "RGBA"
|
||||
return self
|
||||
|
||||
|
||||
@debug_log()
|
||||
def write_lines_to_image(lines: List[str], rectangle: Rectangle, font=None) -> Image.Image:
|
||||
@debug_log()
|
||||
def write_line(line, line_number):
|
||||
draw.text((0, line_number * text_size), line, font=font, fill=(0, 0, 0, 255))
|
||||
|
||||
font = font or pick_random_mono_space_font_available_on_system()
|
||||
|
||||
image = Image.new("RGBA", (rectangle.width, rectangle.height), (0, 255, 255, 0))
|
||||
draw = ImageDraw.Draw(image)
|
||||
text_size = draw.textsize(first(lines), font=font)[1]
|
||||
|
||||
# FIXME: This will sometimes cut off the last line, but it's better than underflowing, since then we would need to
|
||||
# crop the target box to fit the text, and to the detection algorithm a cut off line makes little difference
|
||||
# anyway. At least for now. If it should become relevant, we need to find a clean way to fit target box and text
|
||||
# precisely.
|
||||
n_lines = min(len(lines), ceil(rectangle.height / text_size))
|
||||
|
||||
for line_number, line in enumerate(take(n_lines, lines)):
|
||||
write_line(line, line_number)
|
||||
|
||||
return image
|
||||
0
synthesis/text/__init__.py
Normal file
0
synthesis/text/__init__.py
Normal file
105
synthesis/text/font.py
Normal file
105
synthesis/text/font.py
Normal file
@ -0,0 +1,105 @@
|
||||
import itertools
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
from funcy import lmap, complement, keep, first, lzip, omit, project
|
||||
from loguru import logger
|
||||
|
||||
from synthesis.randomization import rnd
|
||||
|
||||
|
||||
class RandomFontPicker:
|
||||
def __init__(self, font_dir=None, return_default_font=False):
|
||||
fonts = get_fonts(font_dir)
|
||||
fonts_lower = [font.lower() for font in fonts]
|
||||
domestic_fonts_mask = lmap(complement(self.looks_foreign), fonts_lower)
|
||||
self.fonts = list(itertools.compress(fonts, domestic_fonts_mask))
|
||||
self.fonts_lower = list(itertools.compress(fonts_lower, domestic_fonts_mask))
|
||||
|
||||
self.test_image = Image.new("RGB", (200, 200), (255, 255, 255))
|
||||
self.draw = ImageDraw.Draw(self.test_image)
|
||||
self.return_default_font = return_default_font
|
||||
|
||||
def looks_foreign(self, font):
|
||||
# This filters out foreign fonts (e.g. 'Noto Serif Malayalam')
|
||||
return len(font.split("-")[0]) > 10
|
||||
|
||||
def pick_random_font_available_on_system(self, includes=None, excludes=None) -> ImageFont: # FIXME: Slow!
|
||||
|
||||
if self.return_default_font:
|
||||
return ImageFont.load_default()
|
||||
|
||||
includes = [i.lower() for i in includes] if includes else []
|
||||
excludes = [i.lower() for i in excludes] if excludes else []
|
||||
|
||||
logger.debug(f"Picking font by includes={includes} and excludes={excludes}.")
|
||||
|
||||
def includes_pattern(font):
|
||||
return not includes or any(include in font for include in includes)
|
||||
|
||||
def excludes_pattern(font):
|
||||
return not excludes or not any(exclude in font for exclude in excludes)
|
||||
|
||||
self.shuffle_fonts()
|
||||
|
||||
mask = lmap(lambda f: includes_pattern(f) and excludes_pattern(f), self.fonts_lower)
|
||||
fonts = itertools.compress(self.fonts, mask)
|
||||
fonts = keep(map(self.load_font, fonts))
|
||||
# fonts = filter(self.font_is_renderable, fonts) # FIXME: this does not work
|
||||
|
||||
font = first(fonts)
|
||||
logger.info(f"Using font: {font.getname()}")
|
||||
return font
|
||||
|
||||
def shuffle_fonts(self):
|
||||
l = lzip(self.fonts, self.fonts_lower)
|
||||
rnd.shuffle(l)
|
||||
self.fonts, self.fonts_lower = lzip(*l)
|
||||
|
||||
def pick_random_mono_space_font_available_on_system(self) -> ImageFont:
|
||||
return self.pick_random_font_available_on_system(includes=["mono"], excludes=["oblique"])
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def load_font(self, font: str):
|
||||
logger.trace(f"Loading font: {font}")
|
||||
try:
|
||||
return ImageFont.truetype(font, size=11)
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def font_is_renderable(self, font):
|
||||
text_size = self.draw.textsize("Test String", font=font)
|
||||
return text_size[0] > 0 and text_size[1]
|
||||
|
||||
|
||||
def get_fonts(path: Path = None) -> List[str]:
|
||||
path = path or Path("/usr/share/fonts")
|
||||
fonts = list(path.rglob("*.ttf"))
|
||||
fonts = [font.name for font in fonts]
|
||||
return fonts
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_font_picker(**kwargs):
|
||||
return_default_font = kwargs.pop("return_default_font", False)
|
||||
return RandomFontPicker(**kwargs, return_default_font=return_default_font)
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def pick_random_mono_space_font_available_on_system(**kwargs):
|
||||
font_picker = get_font_picker(**omit(kwargs, ["includes", "excludes"]))
|
||||
return font_picker.pick_random_mono_space_font_available_on_system()
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def pick_random_font_available_on_system(**kwargs):
|
||||
kwargs["excludes"] = (
|
||||
*kwargs.get("excludes", {}),
|
||||
"Kinnari",
|
||||
"KacstOne",
|
||||
)
|
||||
font_picker = get_font_picker(**omit(kwargs, ["includes", "excludes"]))
|
||||
return font_picker.pick_random_font_available_on_system(**project(kwargs, ["includes", "excludes"]))
|
||||
0
synthesis/text/line_formatter/__init__.py
Normal file
0
synthesis/text/line_formatter/__init__.py
Normal file
9
synthesis/text/line_formatter/identity.py
Normal file
9
synthesis/text/line_formatter/identity.py
Normal file
@ -0,0 +1,9 @@
|
||||
from synthesis.text.line_formatter.line_formatter import LineFormatter
|
||||
|
||||
|
||||
class IdentityLineFormatter(LineFormatter):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __call__(self, lines, last_full):
|
||||
return lines, last_full
|
||||
5
synthesis/text/line_formatter/line_formatter.py
Normal file
5
synthesis/text/line_formatter/line_formatter.py
Normal file
@ -0,0 +1,5 @@
|
||||
import abc
|
||||
|
||||
|
||||
class LineFormatter(abc.ABC):
|
||||
pass
|
||||
41
synthesis/text/line_formatter/paragraph.py
Normal file
41
synthesis/text/line_formatter/paragraph.py
Normal file
@ -0,0 +1,41 @@
|
||||
from funcy import identity, compose, first, juxt, rest, rcompose
|
||||
|
||||
from cv_analysis.utils import star, rconj
|
||||
from synthesis.randomization import rnd
|
||||
from synthesis.text.line_formatter.line_formatter import LineFormatter
|
||||
|
||||
|
||||
class ParagraphLineFormatter(LineFormatter):
|
||||
def __init__(self, blank_line_percentage=None):
|
||||
self.blank_line_percentage = blank_line_percentage or rnd.uniform(0, 0.5)
|
||||
|
||||
def __call__(self, lines, last_full):
|
||||
return self.format_lines(lines, last_full)
|
||||
|
||||
def format_lines(self, lines, last_full):
|
||||
def truncate_current_line():
|
||||
return rnd.random() < self.blank_line_percentage and last_full
|
||||
|
||||
# This is meant to be read from the bottom up.
|
||||
current_line_shall_not_be_a_full_line = truncate_current_line()
|
||||
line_formatter = self.truncate_line if current_line_shall_not_be_a_full_line else identity
|
||||
format_current_line = compose(line_formatter, first)
|
||||
move_current_line_to_back = star(rconj)
|
||||
split_first_line_from_lines_and_format_the_former = juxt(rest, format_current_line)
|
||||
split_off_current_line_then_format_it_then_move_it_to_the_back = rcompose(
|
||||
split_first_line_from_lines_and_format_the_former,
|
||||
move_current_line_to_back,
|
||||
)
|
||||
current_line_is_a_full_line = not current_line_shall_not_be_a_full_line
|
||||
# Start reading here and move up.
|
||||
return split_off_current_line_then_format_it_then_move_it_to_the_back(lines), current_line_is_a_full_line
|
||||
|
||||
def format_line(self, line, full=True):
|
||||
line = self.truncate_line(line) if not full else line
|
||||
return line, full
|
||||
|
||||
def truncate_line(self, line: str):
|
||||
n_trailing_words = rnd.randint(0, 4)
|
||||
line = " ".join(line.split()[-n_trailing_words - 1 : -1]).replace(".", "")
|
||||
line = line + ".\n" if line else line
|
||||
return line
|
||||
26
synthesis/text/text.py
Normal file
26
synthesis/text/text.py
Normal file
@ -0,0 +1,26 @@
|
||||
import random
|
||||
|
||||
from faker import Faker
|
||||
|
||||
from synthesis.randomization import rnd
|
||||
|
||||
|
||||
def generate_random_words(n_min, n_max):
|
||||
words = " ".join(Faker().words(rnd.randint(n_min, n_max)))
|
||||
return words
|
||||
|
||||
|
||||
def generate_random_number():
|
||||
return str(
|
||||
round(
|
||||
random.choice(
|
||||
[
|
||||
random.randint(-10000, 10000),
|
||||
random.uniform(-100, 100),
|
||||
]
|
||||
),
|
||||
random.choice(
|
||||
[0, 1, 2, 3],
|
||||
),
|
||||
)
|
||||
)
|
||||
0
synthesis/text/text_block_generator/__init__.py
Normal file
0
synthesis/text/text_block_generator/__init__.py
Normal file
22
synthesis/text/text_block_generator/caption.py
Normal file
22
synthesis/text/text_block_generator/caption.py
Normal file
@ -0,0 +1,22 @@
|
||||
from funcy import first, rest
|
||||
|
||||
from cv_analysis.utils import conj
|
||||
from synthesis.randomization import rnd
|
||||
from synthesis.text.text_block_generator.paragraph import generate_random_text_lines
|
||||
from synthesis.text.text_block_generator.text_block_generator import TextBlockGenerator
|
||||
from synthesis.text.line_formatter.identity import IdentityLineFormatter
|
||||
|
||||
|
||||
class CaptionGenerator(TextBlockGenerator):
|
||||
def __init__(self, caption_start=None):
|
||||
self.line_formatter = IdentityLineFormatter()
|
||||
self.caption_start = caption_start or f"Fig {rnd.randint(1, 20)}"
|
||||
|
||||
def __call__(self, rectangle, n_sentences):
|
||||
return self.generate_paragraph(rectangle, n_sentences)
|
||||
|
||||
def generate_paragraph(self, rectangle, n_sentences):
|
||||
lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences)
|
||||
first_line_modified = f"{self.caption_start}: {first(lines)}"
|
||||
lines = conj(first_line_modified, rest(lines))
|
||||
return lines
|
||||
53
synthesis/text/text_block_generator/paragraph.py
Normal file
53
synthesis/text/text_block_generator/paragraph.py
Normal file
@ -0,0 +1,53 @@
|
||||
import textwrap
|
||||
from typing import List
|
||||
|
||||
from faker import Faker
|
||||
from funcy import iterate, take, last
|
||||
|
||||
from cv_analysis.logging import debug_log
|
||||
from cv_analysis.utils import star
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
from synthesis.randomization import rnd
|
||||
from synthesis.text.line_formatter.identity import IdentityLineFormatter
|
||||
from synthesis.text.line_formatter.line_formatter import LineFormatter
|
||||
from synthesis.text.line_formatter.paragraph import ParagraphLineFormatter
|
||||
from synthesis.text.text_block_generator.text_block_generator import TextBlockGenerator
|
||||
|
||||
|
||||
class ParagraphGenerator(TextBlockGenerator):
|
||||
def __init__(self):
|
||||
self.line_formatter = ParagraphLineFormatter(blank_line_percentage=rnd.uniform(0, 0.5))
|
||||
|
||||
@debug_log(exits=False)
|
||||
def __call__(self, rectangle, n_sentences=None):
|
||||
return self.generate_paragraph(rectangle, n_sentences)
|
||||
|
||||
@debug_log(exits=False)
|
||||
def generate_paragraph(self, rectangle, n_sentences=None):
|
||||
lines = generate_random_text_lines(rectangle, self.line_formatter, n_sentences)
|
||||
return lines
|
||||
|
||||
|
||||
@debug_log(exits=False)
|
||||
def generate_random_text_lines(
|
||||
rectangle: Rectangle,
|
||||
line_formatter: LineFormatter = None,
|
||||
n_sentences=None,
|
||||
) -> List[str]:
|
||||
n_sentences = n_sentences or 3000 # TODO: De-hardcode.
|
||||
line_formatter = line_formatter or IdentityLineFormatter()
|
||||
|
||||
text = Faker().paragraph(nb_sentences=n_sentences, variable_nb_sentences=False, ext_word_list=None)
|
||||
unformatted_lines = textwrap.wrap(
|
||||
text,
|
||||
width=rectangle.width // rnd.uniform(4, 5), # TODO: De-hardcode.
|
||||
break_long_words=True,
|
||||
)
|
||||
# each iteration of the line formatter function formats one more line and adds it to the back of the list
|
||||
formatted_lines_generator = iterate(star(line_formatter), (unformatted_lines, True))
|
||||
# hence do as many iterations as there are lines in the rectangle
|
||||
lines_per_iteration = take(len(unformatted_lines), formatted_lines_generator)
|
||||
# and then take the lines from the last iteration of the function
|
||||
formatted_lines, _ = last(lines_per_iteration)
|
||||
|
||||
return formatted_lines
|
||||
@ -0,0 +1,5 @@
|
||||
import abc
|
||||
|
||||
|
||||
class TextBlockGenerator(abc.ABC):
|
||||
pass
|
||||
1
test/.gitignore
vendored
1
test/.gitignore
vendored
@ -1 +1,2 @@
|
||||
/test_data
|
||||
/data
|
||||
|
||||
@ -1,6 +1,13 @@
|
||||
import warnings
|
||||
|
||||
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
||||
|
||||
pytest_plugins = [
|
||||
"test.fixtures.table_parsing",
|
||||
"test.fixtures.figure_detection",
|
||||
"test.fixtures.data",
|
||||
"test.fixtures.formula",
|
||||
"test.fixtures.page_generation.page",
|
||||
]
|
||||
|
||||
|
||||
|
||||
5
test/data.dvc
Normal file
5
test/data.dvc
Normal file
@ -0,0 +1,5 @@
|
||||
outs:
|
||||
- md5: 4e22cc1a7655987683215d4a4677d645.dir
|
||||
size: 25117769
|
||||
nfiles: 6
|
||||
path: data
|
||||
13
test/fixtures/data.py
vendored
Normal file
13
test/fixtures/data.py
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
import pytest
|
||||
from dvc.repo import Repo
|
||||
from loguru import logger
|
||||
|
||||
from cv_analysis.locations import REPO_ROOT_PATH, TEST_DATA_DIR_DVC
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def dvc_test_data():
|
||||
# noinspection PyCallingNonCallable
|
||||
logger.info("Pulling data with DVC...")
|
||||
Repo(REPO_ROOT_PATH).pull(targets=[str(TEST_DATA_DIR_DVC)])
|
||||
logger.info("Finished pulling data.")
|
||||
0
test/fixtures/page_generation/__init__.py
vendored
Normal file
0
test/fixtures/page_generation/__init__.py
vendored
Normal file
252
test/fixtures/page_generation/page.py
vendored
Normal file
252
test/fixtures/page_generation/page.py
vendored
Normal file
@ -0,0 +1,252 @@
|
||||
from typing import Tuple, Iterable, List
|
||||
|
||||
import blend_modes
|
||||
import numpy as np
|
||||
import pytest
|
||||
from PIL import Image, ImageEnhance
|
||||
from PIL.Image import Transpose
|
||||
from funcy import juxt, compose, identity
|
||||
|
||||
from cv_analysis.locations import TEST_PAGE_TEXTURES_DIR
|
||||
from cv_analysis.logging import logger
|
||||
from cv_analysis.utils.conversion import normalize_image_format_to_array, normalize_image_format_to_pil
|
||||
from cv_analysis.utils.image_operations import blur, sharpen, overlay, superimpose
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
from synthesis.content_generator import ContentGenerator
|
||||
from synthesis.partitioner.two_column import TwoColumnPagePartitioner
|
||||
from synthesis.randomization import rnd
|
||||
from synthesis.segment.table.table import paste_contents
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
# "rough_grain",
|
||||
# "plain",
|
||||
# "digital",
|
||||
"crumpled",
|
||||
]
|
||||
)
|
||||
def base_texture(request, size):
|
||||
texture = Image.open(TEST_PAGE_TEXTURES_DIR / (request.param + ".jpg"))
|
||||
texture = texture.resize(size)
|
||||
return texture
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
# "portrait",
|
||||
"landscape",
|
||||
]
|
||||
)
|
||||
def orientation(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
# 30,
|
||||
100,
|
||||
]
|
||||
)
|
||||
def dpi(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
# "brown",
|
||||
"sepia",
|
||||
# "gray",
|
||||
# "white",
|
||||
# "light_red",
|
||||
# "light_blue",
|
||||
]
|
||||
)
|
||||
def color_name(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
# "smooth",
|
||||
# "coarse",
|
||||
"neutral",
|
||||
]
|
||||
)
|
||||
def texture_name(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
# 30,
|
||||
70,
|
||||
# 150,
|
||||
]
|
||||
)
|
||||
def color_intensity(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def random_flip(image):
|
||||
if rnd.choice([True, False]):
|
||||
image = image.transpose(Transpose.FLIP_LEFT_RIGHT)
|
||||
if rnd.choice([True, False]):
|
||||
image = image.transpose(Transpose.FLIP_TOP_BOTTOM)
|
||||
return image
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def color(color_name):
|
||||
return {
|
||||
"brown": "#7d6c5b",
|
||||
"sepia": "#b8af88",
|
||||
"gray": "#9c9c9c",
|
||||
"white": "#ffffff",
|
||||
"light_red": "#d68c8b",
|
||||
"light_blue": "#8bd6d6",
|
||||
}[color_name]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def texture_fn(texture_name, size):
|
||||
if texture_name == "smooth":
|
||||
fn = blur
|
||||
elif texture_name == "coarse":
|
||||
fn = compose(overlay, juxt(blur, sharpen))
|
||||
else:
|
||||
fn = identity
|
||||
|
||||
return normalize_image_function(fn)
|
||||
|
||||
|
||||
def normalize_image_function(func):
|
||||
def inner(image):
|
||||
image = normalize_image_format_to_array(image)
|
||||
image = func(image)
|
||||
image = normalize_image_format_to_pil(image)
|
||||
return image
|
||||
|
||||
return inner
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def texture(tinted_blank_page, base_texture):
|
||||
texture = superimpose(base_texture, tinted_blank_page)
|
||||
return texture
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tinted_blank_page(size, color, color_intensity):
|
||||
tinted_page = Image.new("RGBA", size, color)
|
||||
tinted_page.putalpha(color_intensity)
|
||||
return tinted_page
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def blank_page(size, color, color_intensity):
|
||||
page = Image.new("RGBA", size, color=(255, 255, 255, 0))
|
||||
return page
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def size(dpi, orientation):
|
||||
if orientation == "portrait":
|
||||
size = (8.5 * dpi, 11 * dpi)
|
||||
elif orientation == "landscape":
|
||||
size = (11 * dpi, 8.5 * dpi)
|
||||
else:
|
||||
raise ValueError(f"Unknown orientation: {orientation}")
|
||||
size = tuple(map(int, size))
|
||||
return size
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
TwoColumnPagePartitioner,
|
||||
# RandomPagePartitioner
|
||||
]
|
||||
)
|
||||
def page_partitioner(request):
|
||||
return request.param()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def boxes(page_partitioner, blank_page):
|
||||
boxes = page_partitioner(blank_page)
|
||||
return boxes
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def prepared_texture(texture, texture_fn):
|
||||
texture = random_flip(texture)
|
||||
texture = texture_fn(texture)
|
||||
return texture
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def content_boxes(boxes):
|
||||
content_generator = ContentGenerator()
|
||||
content_boxes = content_generator(boxes)
|
||||
return content_boxes
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def page_with_opaque_content(prepared_texture, content_boxes) -> Tuple[np.ndarray, Iterable[Rectangle]]:
|
||||
page = paste_contents(prepared_texture, content_boxes)
|
||||
|
||||
return page, content_boxes
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def page_with_translucent_content(blank_page, prepared_texture, content_boxes) -> Tuple[np.ndarray, List[Rectangle]]:
|
||||
page_content = paste_contents(blank_page, content_boxes)
|
||||
page = blend_by_multiply(page_content, prepared_texture)
|
||||
|
||||
return page, content_boxes
|
||||
|
||||
|
||||
def blend_by_multiply(page_content, texture):
|
||||
def to_array(image: Image) -> np.ndarray:
|
||||
return np.array(image).astype(np.float32)
|
||||
|
||||
texture.putalpha(255)
|
||||
page_content.putalpha(255)
|
||||
factor = 1.2
|
||||
enhancer = ImageEnhance.Contrast(texture)
|
||||
texture = enhancer.enhance(factor)
|
||||
|
||||
page = blend_modes.multiply(
|
||||
*map(
|
||||
to_array,
|
||||
(
|
||||
page_content,
|
||||
texture,
|
||||
),
|
||||
),
|
||||
opacity=1,
|
||||
).astype(np.uint8)
|
||||
return page
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def random_seeding():
|
||||
from synthesis.segment.plot import pick_colormap
|
||||
|
||||
seed = str(rnd.randint(0, 2**32 - 1))
|
||||
logger.info(f"Random seed: {seed}")
|
||||
rnd.seed(seed)
|
||||
pick_colormap.cache_clear()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def page_with_content(
|
||||
random_seeding,
|
||||
page_with_translucent_content,
|
||||
# page_with_opaque_content,
|
||||
) -> np.ndarray:
|
||||
|
||||
page, boxes = page_with_translucent_content
|
||||
# page, boxes = page_with_opaque_content
|
||||
|
||||
return page, boxes
|
||||
2
test/fixtures/server.py
vendored
2
test/fixtures/server.py
vendored
@ -6,7 +6,7 @@ import cv2
|
||||
import pytest
|
||||
from funcy import first
|
||||
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
||||
16
test/fixtures/table_parsing.py
vendored
16
test/fixtures/table_parsing.py
vendored
@ -3,14 +3,12 @@ from os.path import join
|
||||
|
||||
import cv2
|
||||
import pytest
|
||||
from dvc.repo import Repo
|
||||
from funcy import first
|
||||
from loguru import logger
|
||||
|
||||
from cv_analysis.config import get_config
|
||||
from cv_analysis.locations import REPO_ROOT_PATH, TEST_DATA_DVC
|
||||
from cv_analysis.utils.draw import draw_rectangles
|
||||
from cv_analysis.utils.open_pdf import open_pdf
|
||||
from cv_analysis.utils.drawing import draw_rectangles
|
||||
from cv_analysis.utils.input import open_analysis_input_file
|
||||
from test.fixtures.figure_detection import paste_text
|
||||
|
||||
CV_CONFIG = get_config()
|
||||
@ -19,15 +17,7 @@ CV_CONFIG = get_config()
|
||||
@pytest.fixture
|
||||
def client_page_with_table(test_file_index, dvc_test_data):
|
||||
img_path = join(CV_CONFIG.test_data_dir, f"test{test_file_index}.png")
|
||||
return first(open_pdf(img_path))
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def dvc_test_data():
|
||||
# noinspection PyCallingNonCallable
|
||||
logger.info("Pulling data with DVC...")
|
||||
Repo(REPO_ROOT_PATH).pull(targets=[str(TEST_DATA_DVC)])
|
||||
logger.info("Finished pulling data.")
|
||||
return first(open_analysis_input_file(img_path))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
||||
19
test/page_generation_test.py
Normal file
19
test/page_generation_test.py
Normal file
@ -0,0 +1,19 @@
|
||||
from typing import Iterable
|
||||
|
||||
from PIL.Image import Image
|
||||
|
||||
from cv_analysis.utils.display import show_image
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
|
||||
|
||||
def test_blank_page(page_with_content):
|
||||
page, boxes = page_with_content
|
||||
|
||||
draw_boxes(page, boxes)
|
||||
|
||||
|
||||
def draw_boxes(page: Image, boxes: Iterable[Rectangle]):
|
||||
from cv_analysis.utils.drawing import draw_rectangles
|
||||
|
||||
page = draw_rectangles(page, boxes, filled=False, annotate=True)
|
||||
show_image(page, backend="pil")
|
||||
@ -3,6 +3,7 @@ from math import prod
|
||||
import cv2
|
||||
import pytest
|
||||
|
||||
from cv_analysis.utils.spacial import area
|
||||
from test.utils.utils import powerset
|
||||
|
||||
|
||||
@ -15,21 +16,20 @@ class TestFindPrimaryTextRegions:
|
||||
|
||||
@pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)])
|
||||
def test_page_without_text_yields_figures(self, figure_detection_pipeline, page_with_images, image_size):
|
||||
results = figure_detection_pipeline(page_with_images)
|
||||
result_figures_size = map(lambda x: (x.w, x.h), results)
|
||||
result_rectangles = figure_detection_pipeline(page_with_images)
|
||||
result_figure_sizes = map(lambda r: (r.width, r.height), result_rectangles)
|
||||
|
||||
assert all([image_size[0] < res[0] and image_size[1] < res[1] for res in result_figures_size])
|
||||
assert all([image_size[0] < res[0] and image_size[1] < res[1] for res in result_figure_sizes])
|
||||
|
||||
@pytest.mark.parametrize("font_scale", [1, 1.5, 2])
|
||||
@pytest.mark.parametrize("font_style", [cv2.FONT_HERSHEY_SIMPLEX, cv2.FONT_HERSHEY_COMPLEX])
|
||||
@pytest.mark.parametrize("text_types", powerset(["body", "header", "caption"]))
|
||||
@pytest.mark.parametrize("error_tolerance", [0.025])
|
||||
def test_page_with_only_text_yields_no_figures(self, figure_detection_pipeline, page_with_text, error_tolerance):
|
||||
results = figure_detection_pipeline(page_with_text)
|
||||
|
||||
result_figures_area = sum(map(lambda x: (x.w * x.h), results))
|
||||
result_rectangles = figure_detection_pipeline(page_with_text)
|
||||
result_figure_areas = sum(map(area, result_rectangles))
|
||||
page_area = prod(page_with_text.shape)
|
||||
error = result_figures_area / page_area
|
||||
error = result_figure_areas / page_area
|
||||
|
||||
assert error <= error_tolerance
|
||||
|
||||
@ -45,11 +45,11 @@ class TestFindPrimaryTextRegions:
|
||||
image_size,
|
||||
error_tolerance,
|
||||
):
|
||||
results = list(figure_detection_pipeline(page_with_images_and_text))
|
||||
result_rectangles = list(figure_detection_pipeline(page_with_images_and_text))
|
||||
|
||||
result_figures_area = sum(map(lambda x: (x.w * x.h), results))
|
||||
result_figure_areas = sum(map(area, result_rectangles))
|
||||
expected_figure_area = prod(image_size)
|
||||
|
||||
error = abs(result_figures_area - expected_figure_area) / expected_figure_area
|
||||
error = abs(result_figure_areas - expected_figure_area) / expected_figure_area
|
||||
|
||||
assert error <= error_tolerance
|
||||
|
||||
0
test/unit_tests/layout_parsing_test.py
Normal file
0
test/unit_tests/layout_parsing_test.py
Normal file
@ -3,12 +3,11 @@ import numpy as np
|
||||
import pytest
|
||||
|
||||
from cv_analysis.server.pipeline import table_parsing_formatter, figure_detection_formatter, make_analysis_pipeline
|
||||
from cv_analysis.utils.structures import Rectangle
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
|
||||
|
||||
def analysis_fn_mock(image: np.ndarray):
|
||||
bbox = (0, 0, 42, 42)
|
||||
return [Rectangle.from_xyxy(bbox)]
|
||||
return [Rectangle(0, 0, 42, 42)]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
||||
@ -2,9 +2,12 @@ from itertools import starmap
|
||||
|
||||
import cv2
|
||||
import pytest
|
||||
from funcy import lmap, compose, zipdict
|
||||
|
||||
from cv_analysis.table_parsing import parse_tables
|
||||
from cv_analysis.utils.test_metrics import compute_document_score
|
||||
from cv_analysis.utils import lift
|
||||
from cv_analysis.utils.rectangle import Rectangle
|
||||
from cv_analysis.utils.metrics import compute_document_score
|
||||
|
||||
|
||||
@pytest.mark.parametrize("score_threshold", [0.95])
|
||||
@ -12,8 +15,9 @@ from cv_analysis.utils.test_metrics import compute_document_score
|
||||
def test_table_parsing_on_client_pages(
|
||||
score_threshold, client_page_with_table, expected_table_annotation, test_file_index
|
||||
):
|
||||
result = [x.json_xywh() for x in parse_tables(client_page_with_table)]
|
||||
formatted_result = {"pages": [{"page": str(test_file_index), "cells": result}]}
|
||||
|
||||
results = compose(lift(rectangle_to_dict), parse_tables)(client_page_with_table)
|
||||
formatted_result = {"pages": [{"cells": results}]}
|
||||
|
||||
score = compute_document_score(formatted_result, expected_table_annotation)
|
||||
|
||||
@ -25,6 +29,14 @@ def error_tolerance(line_thickness):
|
||||
return line_thickness * 7
|
||||
|
||||
|
||||
def rectangle_to_dict(rectangle: Rectangle):
|
||||
return zipdict(["x", "y", "width", "height"], rectangle_to_xywh(rectangle))
|
||||
|
||||
|
||||
def rectangle_to_xywh(rectangle: Rectangle):
|
||||
return rectangle.x1, rectangle.y1, abs(rectangle.x1 - rectangle.x2), abs(rectangle.y1 - rectangle.y2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("line_thickness", [1, 2, 3])
|
||||
@pytest.mark.parametrize("line_type", [cv2.LINE_4, cv2.LINE_AA, cv2.LINE_8])
|
||||
@pytest.mark.parametrize("table_style", ["closed horizontal vertical", "open horizontal vertical"])
|
||||
@ -32,7 +44,7 @@ def error_tolerance(line_thickness):
|
||||
@pytest.mark.parametrize("background_color", [255, 220])
|
||||
@pytest.mark.parametrize("table_shape", [(5, 8)])
|
||||
def test_table_parsing_on_generic_pages(page_with_table, expected_gold_page_with_table, error_tolerance):
|
||||
result = [x.xywh() for x in parse_tables(page_with_table)]
|
||||
result = lmap(rectangle_to_xywh, parse_tables(page_with_table))
|
||||
assert (
|
||||
result == expected_gold_page_with_table
|
||||
or average_error(result, expected_gold_page_with_table) <= error_tolerance
|
||||
@ -46,8 +58,8 @@ def test_table_parsing_on_generic_pages(page_with_table, expected_gold_page_with
|
||||
@pytest.mark.parametrize("background_color", [255, 220])
|
||||
@pytest.mark.parametrize("table_shape", [(5, 8)])
|
||||
@pytest.mark.xfail
|
||||
def test_bad_qual_table(page_with_patchy_table, expected_gold_page_with_table, error_tolerance):
|
||||
result = [x.xywh() for x in parse_tables(page_with_patchy_table)]
|
||||
def test_low_quality_table(page_with_patchy_table, expected_gold_page_with_table, error_tolerance):
|
||||
result = lmap(rectangle_to_xywh, parse_tables(page_with_patchy_table))
|
||||
assert (
|
||||
result == expected_gold_page_with_table
|
||||
or average_error(result, expected_gold_page_with_table) <= error_tolerance
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user