removed PIL from production code, now inly in scripts

This commit is contained in:
Isaac Riley 2022-07-20 16:32:42 +02:00
parent ce9e92876c
commit dbc6d345f0
21 changed files with 113 additions and 330 deletions

View File

@ -17,9 +17,7 @@ from cv_analysis.utils.structures import Rectangle
def make_figure_detection_pipeline(min_area=5000, max_width_to_height_ratio=6): def make_figure_detection_pipeline(min_area=5000, max_width_to_height_ratio=6):
def pipeline(image: np.array): def pipeline(image: np.array):
max_area = image.shape[0] * image.shape[1] * 0.99 max_area = image.shape[0] * image.shape[1] * 0.99
filter_cnts = make_filter_likely_figures( filter_cnts = make_filter_likely_figures(min_area, max_area, max_width_to_height_ratio)
min_area, max_area, max_width_to_height_ratio
)
image = remove_primary_text_regions(image) image = remove_primary_text_regions(image)
cnts = detect_large_coherent_structures(image) cnts = detect_large_coherent_structures(image)

View File

@ -5,10 +5,6 @@ from operator import __and__
import cv2 import cv2
import numpy as np import numpy as np
# from pdf2image import pdf2image
# from cv_analysis.utils.display import show_mpl
# from cv_analysis.utils.draw import draw_rectangles
from cv_analysis.utils.structures import Rectangle from cv_analysis.utils.structures import Rectangle
from cv_analysis.utils.post_processing import ( from cv_analysis.utils.post_processing import (
remove_overlapping, remove_overlapping,
@ -23,9 +19,7 @@ def is_likely_segment(rect, min_area=100):
def find_segments(image): def find_segments(image):
contours, hierarchies = cv2.findContours( contours, hierarchies = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
)
mask1 = map(is_likely_segment, contours) mask1 = map(is_likely_segment, contours)
mask2 = map(has_no_parent, hierarchies[0]) mask2 = map(has_no_parent, hierarchies[0])
@ -81,21 +75,3 @@ def parse_layout(image: np.array):
rects = remove_overlapping(rects) rects = remove_overlapping(rects)
return list(map(Rectangle.from_xywh, rects)) return list(map(Rectangle.from_xywh, rects))
# def annotate_layout_in_pdf(page, return_rects=False, show=False):
# #page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0]
# #page = np.array(page)
# rects = parse_layout(page)
# if return_rects:
# return rects, page
# elif show:
# page = draw_rectangles(page, rects)
# vizlogger.debug(page, "layout10_output.png")
# show_mpl(page)
# else:
# page = draw_rectangles(page, rects)
# return page

View File

@ -5,16 +5,12 @@ import numpy as np
import pdf2image import pdf2image
from iteration_utilities import starfilter, first from iteration_utilities import starfilter, first
from cv_analysis.utils.display import show_mpl
from cv_analysis.utils.draw import draw_contours
from cv_analysis.utils.filters import is_large_enough, is_filled, is_boxy from cv_analysis.utils.filters import is_large_enough, is_filled, is_boxy
from cv_analysis.utils.visual_logging import vizlogger from cv_analysis.utils.visual_logging import vizlogger
def is_likely_redaction(contour, hierarchy, min_area): def is_likely_redaction(contour, hierarchy, min_area):
return ( return is_filled(hierarchy) and is_boxy(contour) and is_large_enough(contour, min_area)
is_filled(hierarchy) and is_boxy(contour) and is_large_enough(contour, min_area)
)
def find_redactions(image: np.array, min_normalized_area=200000): def find_redactions(image: np.array, min_normalized_area=200000):
@ -31,9 +27,7 @@ def find_redactions(image: np.array, min_normalized_area=200000):
thresh = cv2.threshold(blurred, 252, 255, cv2.THRESH_BINARY)[1] thresh = cv2.threshold(blurred, 252, 255, cv2.THRESH_BINARY)[1]
vizlogger.debug(blurred, "redactions04_threshold.png") vizlogger.debug(blurred, "redactions04_threshold.png")
contours, hierarchies = cv2.findContours( contours, hierarchies = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE
)
try: try:
contours = map( contours = map(
@ -46,16 +40,3 @@ def find_redactions(image: np.array, min_normalized_area=200000):
return list(contours) return list(contours)
except: except:
return [] return []
# def annotate_redactions_in_pdf(page, show=False):
# #page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0]
# #page = np.array(page)
# redaction_contours = find_redactions(page)
# page = draw_contours(page, redaction_contours)
# vizlogger.debug(page, "redactions05_output.png")
# if show:
# show_mpl(page)

View File

@ -7,7 +7,7 @@ from pyinfra.server.utils import make_streamable_and_wrap_in_packing_logic
from cv_analysis.server.format import make_formatter from cv_analysis.server.format import make_formatter
from cv_analysis.utils.logging import get_logger from cv_analysis.utils.logging import get_logger
from cv_analysis.utils.preprocessing import open_img_from_bytes from cv_analysis.utils.preprocessing import open_img
logger = get_logger() logger = get_logger()
@ -26,7 +26,7 @@ def make_streamable_analysis_fn(analysis_fn: Callable):
def analyse(data: bytes, metadata: dict): def analyse(data: bytes, metadata: dict):
image = open_img_from_bytes(gzip.decompress(data)) image = open_img(gzip.decompress(data))[0]
dpi = metadata["image_info"]["dpi"] dpi = metadata["image_info"]["dpi"]
width, height, rotation = itemgetter("width", "height", "rotation")(metadata["page_info"]) width, height, rotation = itemgetter("width", "height", "rotation")(metadata["page_info"])

View File

@ -15,9 +15,7 @@ from cv_analysis.layout_parsing import parse_layout
def add_external_contours(image, image_h_w_lines_only): def add_external_contours(image, image_h_w_lines_only):
contours, _ = cv2.findContours( contours, _ = cv2.findContours(image_h_w_lines_only, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
image_h_w_lines_only, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE
)
for cnt in contours: for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt) x, y, w, h = cv2.boundingRect(cnt)
cv2.rectangle(image, (x, y), (x + w, y + h), 255, 1) cv2.rectangle(image, (x, y), (x + w, y + h), 255, 1)
@ -82,9 +80,7 @@ def isolate_vertical_and_horizontal_components(img_bin):
img_bin_extended = img_bin_h | img_bin_v img_bin_extended = img_bin_h | img_bin_v
th1, img_bin_extended = cv2.threshold(img_bin_extended, 120, 255, cv2.THRESH_BINARY) th1, img_bin_extended = cv2.threshold(img_bin_extended, 120, 255, cv2.THRESH_BINARY)
img_bin_final = cv2.dilate( img_bin_final = cv2.dilate(img_bin_extended, np.ones((1, 1), np.uint8), iterations=1)
img_bin_extended, np.ones((1, 1), np.uint8), iterations=1
)
# add contours before lines are extended by blurring # add contours before lines are extended by blurring
img_bin_final = add_external_contours(img_bin_final, img_lines_raw) img_bin_final = add_external_contours(img_bin_final, img_lines_raw)
@ -137,9 +133,7 @@ def turn_connected_components_into_rects(image):
x1, y1, w, h, area = stat x1, y1, w, h, area = stat
return area > 2000 and w > 35 and h > 25 return area > 2000 and w > 35 and h > 25
_, _, stats, _ = cv2.connectedComponentsWithStats( _, _, stats, _ = cv2.connectedComponentsWithStats(~image, connectivity=8, ltype=cv2.CV_32S)
~image, connectivity=8, ltype=cv2.CV_32S
)
stats = np.vstack(list(filter(is_large_enough, stats))) stats = np.vstack(list(filter(is_large_enough, stats)))
return stats[:, :-1][2:] return stats[:, :-1][2:]
@ -149,7 +143,7 @@ def parse_tables(image: np.array, show=False):
"""Runs the full table parsing process. """Runs the full table parsing process.
Args: Args:
image (np.array): single PDF page, opened as PIL.Image object and converted to a numpy array image (np.array): single PDF page, converted to a numpy array
Returns: Returns:
list: list of rectangles corresponding to table cells list: list of rectangles corresponding to table cells

View File

@ -1,87 +0,0 @@
import numpy as np
from scipy.ndimage import rotate as rotate_
import cv2
from cv_analysis.config import CONFIG
def rotate_straight(im: np.array, skew_angle: int) -> np.array:
h, w = im.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, skew_angle, 1.0)
rotated = cv2.warpAffine(
im, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE
)
return rotated
def find_score(arr, angle):
data = rotate_(arr, angle, reshape=False, order=0, mode=CONFIG.deskew.mode)
hist = np.sum(data, axis=1)
score = np.sum((hist[1:] - hist[:-1]) ** 2)
return score
def find_best_angle(page):
lim = CONFIG.deskew.max_abs_angle
delta = CONFIG.deskew.delta
angles = np.arange(-lim, lim + delta, delta)
scores = [find_score(page, angle) for angle in angles]
best_angle = angles[scores.index(max(scores))]
return best_angle
def preprocess(arr: np.array):
if len(arr.shape) > 2:
arr = cv2.cvtColor(arr, cv2.COLOR_BGR2GRAY)
arr = cv2.fastNlMeansDenoising(arr, h=CONFIG.deskew.filter_strength_h)
return arr
def rotate(page, angle):
rotated = rotate_(page, angle, reshape=False, order=0, mode="nearest")
return rotated
def deskew_histbased(page: np.array):
page = preprocess(page)
best_angle = round(find_best_angle(page), 3)
if CONFIG.deskew.verbose:
print("Skew angle from pixel histogram: {}".format(best_angle))
rotated = rotate(page, best_angle)
return (rotated, best_angle)
def needs_deskew(page: np.array) -> bool:
"""
Makes use of 'row-wise mean difference' - the difference between neighboring - on left and right halves
"""
def split_rowmean_diff(page):
width = page.shape[1]
cutpoint = int(width / 2)
left = page[:, :cutpoint]
right = page[:, cutpoint:]
leftmeans = np.mean(left, axis=1)
rightmeans = np.mean(right, axis=1)
return rightmeans - leftmeans
unrotated_score = np.mean(np.abs(split_rowmean_diff(page)))
angles = [-CONFIG.deskew.test_delta, CONFIG.deskew.test_delta]
scores = [
np.mean(np.abs(split_rowmean_diff(rotate(page, angle)))) for angle in angles
]
print(unrotated_score, scores)
return unrotated_score > min(scores)
if CONFIG.deskew.function == "hist":
deskew = lambda page: deskew_histbased(page) if needs_deskew(page) else (page, 0)
elif CONFIG.deskew.function == "identity":
deskew = lambda page: (page, None)
else:
raise ValueError(
"'{CONFIG.deskew.function}' is not a valid parameter value for CONFIG.deskew.function"
)

View File

@ -1,26 +1,34 @@
from numpy import resize
import cv2 import cv2
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
def show_mpl(image): def show_image_cv2(image, maxdim=700):
h, w, c = image.shape
maxhw = max(h, w)
if maxhw > maxdim:
ratio = maxdim / maxhw
h = int(h * ratio)
w = int(w * ratio)
img = cv2.resize(image, (h, w))
cv2.imshow("", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
def show_image_mpl(image):
fig, ax = plt.subplots(1, 1) fig, ax = plt.subplots(1, 1)
fig.set_size_inches(20, 20) fig.set_size_inches(20, 20)
ax.imshow(image, cmap="gray") ax.imshow(image, cmap="gray")
plt.show() plt.show()
def save_mpl(image, path): def show_image(image, backend="m"):
# fig, ax = plt.subplots(1, 1) if backend.startswith("m"):
# figure = plt.gcf() show_image_mpl(image)
# figure.set_size_inches(16,12) else:
fig, ax = plt.subplots(1, 1) show_image_cv2(image)
fig.set_size_inches(20, 20)
ax.imshow(image, cmap="gray")
# plt.close()
plt.savefig(path)
plt.close()
def show_cv2(image): def save_image(image, path):
cv2.imshow("", image) cv2.imwrite(path, image)
cv2.waitKey(0)

View File

@ -8,9 +8,7 @@ from cv_analysis.config import CONFIG
def make_logger_getter(): def make_logger_getter():
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.setLevel(logging.getLevelName(CONFIG.service.logging_level)) logger.setLevel(logging.getLevelName(CONFIG.service.logging_level))
formatter = logging.Formatter( formatter = logging.Formatter(fmt="%(asctime)s %(levelname)s: %(message)s", datefmt="%d.%m.%Y - %H:%M:%S")
fmt="%(asctime)s %(levelname)s: %(message)s", datefmt="%d.%m.%Y - %H:%M:%S"
)
ch = logging.StreamHandler(sys.stdout) ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.getLevelName(CONFIG.service.logging_level)) ch.setLevel(logging.getLevelName(CONFIG.service.logging_level))

View File

@ -0,0 +1,27 @@
from numpy import array, ndarray
import pdf2image
from PIL import Image
from cv_analysis.utils.preprocessing import preprocess_page_array
def open_pdf(pdf, first_page=0, last_page=None):
first_page += 1
last_page = None if last_page is None else last_page + 1
if type(pdf) == str:
if pdf.lower().endswith((".png", ".jpg", ".jpeg")):
pages = [Image.open(pdf)]
elif pdf.lower().endswith(".pdf"):
pages = pdf2image.convert_from_path(pdf, first_page=first_page, last_page=last_page)
else:
raise IOError("Invalid file extension. Accepted filetypes:\n\t.png\n\t.jpg\n\t.jpeg\n\t.pdf")
elif type(pdf) == bytes:
pages = pdf2image.convert_from_bytes(pdf, first_page=first_page, last_page=last_page)
elif type(pdf) in {list, ndarray}:
return pdf
pages = [preprocess_page_array(array(p)) for p in pages]
return pages

View File

@ -18,21 +18,11 @@ def remove_overlapping(rectangles):
def remove_included(rectangles): def remove_included(rectangles):
def included(a, b): def included(a, b):
return ( return b.xmin >= a.xmin and b.ymin >= a.ymin and b.xmax <= a.xmax and b.ymax <= a.ymax
b.xmin >= a.xmin
and b.ymin >= a.ymin
and b.xmax <= a.xmax
and b.ymax <= a.ymax
)
def includes(a, b, tol=3): def includes(a, b, tol=3):
"""does a include b?""" """does a include b?"""
return ( return b.xmin + tol >= a.xmin and b.ymin + tol >= a.ymin and b.xmax - tol <= a.xmax and b.ymax - tol <= a.ymax
b.xmin + tol >= a.xmin
and b.ymin + tol >= a.ymin
and b.xmax - tol <= a.xmax
and b.ymax - tol <= a.ymax
)
def is_not_included(rect, rectangles): def is_not_included(rect, rectangles):
return not any(includes(r2, rect) for r2 in rectangles if not rect == r2) return not any(includes(r2, rect) for r2 in rectangles if not rect == r2)
@ -110,9 +100,7 @@ def __remove_isolated_sorted(rectangles):
def remove_isolated(rectangles, input_sorted=False): def remove_isolated(rectangles, input_sorted=False):
return (__remove_isolated_sorted if input_sorted else __remove_isolated_unsorted)( return (__remove_isolated_sorted if input_sorted else __remove_isolated_unsorted)(rectangles)
rectangles
)
Rectangle = namedtuple("Rectangle", "xmin ymin xmax ymax") Rectangle = namedtuple("Rectangle", "xmin ymin xmax ymax")

View File

@ -1,41 +1,29 @@
from io import BytesIO from numpy import frombuffer, ndarray
from numpy import array, ndarray
import pdf2image
from PIL import Image
import cv2 import cv2
def preprocess_pdf_image(page): def preprocess_page_array(page):
if len(page.shape) > 2: if len(page.shape) > 2:
page = cv2.cvtColor(page, cv2.COLOR_BGR2GRAY) page = cv2.cvtColor(page, cv2.COLOR_BGR2GRAY)
page = cv2.fastNlMeansDenoising(page, h=3) page = cv2.fastNlMeansDenoising(page, h=3)
return page return page
def open_pdf(pdf, first_page=0, last_page=None): def page2image(page):
first_page += 1 if type(page) == bytes:
last_page = None if last_page is None else last_page + 1 page = frombuffer(page)
elif type(page) == ndarray:
if type(pdf) == str: page = page
if pdf.lower().endswith((".png", ".jpg", ".jpeg")): elif type(page) == str:
pages = [Image.open(pdf)] if page.lower().endswith((".png", ".jpg", ".jpeg")):
else: # assume pdf as default file type for a path argument page = cv2.imread(page)
pages = pdf2image.convert_from_path( else:
pdf, first_page=first_page, last_page=last_page raise IOError(
"PDFs are not a valid input type for cv-analysis."
" Use PNGs for tests and NumPy arrays for deployment."
) )
elif type(pdf) == bytes: else:
pages = pdf2image.convert_from_bytes( raise TypeError("Incompatible datatype. Expected bytes, numpy.ndarray, or path to an image file.")
pdf, first_page=first_page, last_page=last_page
)
elif type(pdf) in {list, ndarray}:
return pdf
pages = [preprocess_pdf_image(array(p)) for p in pages] return preprocess_page_array(page)
return pages
def open_img_from_bytes(bytes_obj: bytes):
page = Image.open(BytesIO(bytes_obj))
return preprocess_pdf_image(array(page))

View File

@ -75,11 +75,7 @@ def compute_document_score(results_dict, annotation_dict):
scores = [] scores = []
for i in range(len(annotation_dict["pages"])): for i in range(len(annotation_dict["pages"])):
scores.append( scores.append(compute_page_iou(results_dict["pages"][i]["cells"], annotation_dict["pages"][i]["cells"]))
compute_page_iou(
results_dict["pages"][i]["cells"], annotation_dict["pages"][i]["cells"]
)
)
scores = np.array(scores) scores = np.array(scores)
doc_score = np.average(scores, weights=page_weights) doc_score = np.average(scores, weights=page_weights)

View File

@ -1,6 +1,6 @@
import os import os
from cv_analysis.config import CONFIG from cv_analysis.config import CONFIG
from cv_analysis.utils.display import save_mpl from cv_analysis.utils.display import save_image
class VisualLogger: class VisualLogger:
@ -12,7 +12,7 @@ class VisualLogger:
def _save(self, img, name): def _save(self, img, name):
output_path = os.path.join(self.output_folder, name) output_path = os.path.join(self.output_folder, name)
save_mpl(img, output_path) save_image(img, output_path)
def info(self, img, name): def info(self, img, name):
if self._level_is_info(): if self._level_is_info():
@ -36,6 +36,4 @@ class VisualLogger:
return self.level == "ALL" return self.level == "ALL"
vizlogger = VisualLogger( vizlogger = VisualLogger(CONFIG.visual_logging.level, CONFIG.visual_logging.output_folder)
CONFIG.visual_logging.level, CONFIG.visual_logging.output_folder
)

View File

@ -8,9 +8,9 @@ python scripts/annotate.py /home/iriley/Documents/pdf/scanned/10.pdf 5 --type fi
import argparse import argparse
from cv_analysis.utils.display import show_mpl from cv_analysis.utils.display import show_image
from cv_analysis.utils.draw import draw_contours, draw_rectangles from cv_analysis.utils.draw import draw_contours, draw_rectangles
from cv_analysis.utils.preprocessing import open_pdf from cv_analysis.utils.open_pdf import open_pdf
from cv_analysis.utils.visual_logging import vizlogger from cv_analysis.utils.visual_logging import vizlogger
@ -28,7 +28,7 @@ def annotate_page(page_image, analysis_function, drawing_function, name="tmp.png
result = analysis_function(page_image) result = analysis_function(page_image)
page_image = drawing_function(page_image, result) page_image = drawing_function(page_image, result)
vizlogger.debug(page_image, "redactions05_output.png") vizlogger.debug(page_image, "redactions05_output.png")
show_mpl(page_image) show_image(page_image)
if __name__ == "__main__": if __name__ == "__main__":
@ -46,5 +46,6 @@ if __name__ == "__main__":
from cv_analysis.layout_parsing import parse_layout as analyze from cv_analysis.layout_parsing import parse_layout as analyze
elif args.type == "figure": elif args.type == "figure":
from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline from cv_analysis.figure_detection.figure_detection_pipeline import make_figure_detection_pipeline
analyze = make_figure_detection_pipeline() analyze = make_figure_detection_pipeline()
annotate_page(page, analyze, draw, name=name, show=args.show) annotate_page(page, analyze, draw, name=name, show=args.show)

View File

@ -1,50 +0,0 @@
# sample usage: python3 scripts/deskew_demo.py /path/to/crooked.pdf 0
import argparse
import numpy as np
import pdf2image
from PIL import Image
from cv_analysis.utils.deskew import deskew_histbased # , deskew_linebased
from cv_analysis.utils.display import show_mpl
from cv_analysis.utils.draw import draw_stats
from cv_analysis.table_parsing import parse_tables
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("pdf_path")
parser.add_argument("page_index", type=int)
parser.add_argument("--save_path")
args = parser.parse_args()
return args
if __name__ == "__main__":
args = parse_args()
page = pdf2image.convert_from_path(args.pdf_path, first_page=args.page_index + 1, last_page=args.page_index + 1)[0]
page = np.array(page)
show_mpl(page)
# page_ = deskew_linebased(page, verbose=True)
# show_mpl(page_)
page_corr, _ = deskew_histbased(page, verbose=True)
show_mpl(page_corr)
if args.save_path:
page_ = Image.fromarray(page).convert("RGB")
page_.save(args.save_path.replace(".pdf", "_uncorrected.pdf"))
page_corr_ = Image.fromarray(page_corr).convert("RGB")
page_corr_.save(args.save_path.replace(".pdf", "_corrected.pdf"))
# annotate_tables_in_pdf(args.pdf_path, page_index=args.page_index)
stats = parse_tables(page)
page = draw_stats(page, stats)
show_mpl(page)
stats_corr = parse_tables(page_corr)
page_corr = draw_stats(page_corr, stats_corr)
show_mpl(page_corr)
if args.save_path:
page = Image.fromarray(page).convert("RGB")
page.save(args.save_path.replace(".pdf", "_uncorrected_annotated.pdf"))
page_corr = Image.fromarray(page_corr).convert("RGB")
page_corr.save(args.save_path.replace(".pdf", "_corrected_annotated.pdf"))

View File

@ -1,16 +1,11 @@
import argparse import argparse
import base64
import gzip import gzip
import io
import json
from operator import itemgetter from operator import itemgetter
from typing import List from typing import List
import fitz import fitz
import pdf2image import pdf2image
from PIL import Image
from funcy import lmap, compose, pluck from funcy import lmap, compose, pluck
from funcy import lpluck
from pyinfra.default_objects import get_component_factory from pyinfra.default_objects import get_component_factory
@ -45,13 +40,13 @@ def draw_cells_on_page(cells: List[dict], page):
def annotate_results_on_pdf(results, pdf_path, result_path): def annotate_results_on_pdf(results, pdf_path, result_path):
open_pdf = fitz.open(pdf_path) opened_pdf = fitz.open(pdf_path)
metadata_per_page = pluck("metadata", results) metadata_per_page = pluck("metadata", results)
for page, metadata in zip(open_pdf, metadata_per_page): for page, metadata in zip(opened_pdf, metadata_per_page):
if metadata: if metadata:
draw_cells_on_page(metadata["cells"], page) draw_cells_on_page(metadata["cells"], page)
open_pdf.save(result_path) opened_pdf.save(result_path)
def main(args): def main(args):

View File

@ -3,18 +3,17 @@ import textwrap
import cv2 import cv2
import numpy as np import numpy as np
import pytest import pytest
from PIL import Image
from lorem_text import lorem from lorem_text import lorem
from funcy import first from funcy import first
from cv_analysis.figure_detection.figure_detection_pipeline import ( from cv_analysis.figure_detection.figure_detection_pipeline import (
make_figure_detection_pipeline, make_figure_detection_pipeline,
) )
from cv_analysis.utils.display import show_mpl from cv_analysis.utils.display import show_image
@pytest.fixture @pytest.fixture
def page_with_images(random_image, n_images, background): def page_with_images(random_image, n_images, background):
page_image = Image.fromarray(background.astype("uint8")).convert("RGB") # page_image = Image.fromarray(background.astype("uint8")).convert("RGB")
page_image = paste_image(page_image, random_image, (200, 200)) page_image = paste_image(page_image, random_image, (200, 200))
if n_images == 2: if n_images == 2:
page_image = paste_image(page_image, random_image, (1000, 2600)) page_image = paste_image(page_image, random_image, (1000, 2600))
@ -32,14 +31,10 @@ def page_with_text(background, font_scale, font_style, text_types):
cursor = (image.shape[1] // 2, 70) cursor = (image.shape[1] // 2, 70)
image = paste_text(image, cursor, font_scale, font_style, y_stop=body_height) image = paste_text(image, cursor, font_scale, font_style, y_stop=body_height)
cursor = (50, body_height + 70) cursor = (50, body_height + 70)
image = paste_text( image = paste_text(image, cursor, font_scale, font_style, y_stop=body_height * 2)
image, cursor, font_scale, font_style, y_stop=body_height * 2
)
if "caption" in text_types: if "caption" in text_types:
cursor = (image.shape[1] // 2, image.shape[0] - 100) cursor = (image.shape[1] // 2, image.shape[0] - 100)
image = paste_text( image = paste_text(image, cursor, font_scale, font_style, y_stop=body_height * 3)
image, cursor, font_scale, font_style, y_stop=body_height * 3
)
return image return image
@ -67,9 +62,7 @@ def paste_text(image: np.ndarray, cursor, font_scale, font_style, y_stop):
def paste_text_at_cursor(x_start, y_start, y_stop): def paste_text_at_cursor(x_start, y_start, y_stop):
# TODO: adjust incorrect right margin # TODO: adjust incorrect right margin
text = lorem.paragraphs(1) * 200 text = lorem.paragraphs(1) * 200
(dx, dy), base = cv2.getTextSize( (dx, dy), base = cv2.getTextSize(text, fontFace=font_style, fontScale=font_scale, thickness=1)
text, fontFace=font_style, fontScale=font_scale, thickness=1
)
dy += base dy += base
# char_width = dx // len(text) # char_width = dx // len(text)
text = textwrap.fill(text=text, width=(dx // page_width)) text = textwrap.fill(text=text, width=(dx // page_width))
@ -95,6 +88,7 @@ def paste_text(image: np.ndarray, cursor, font_scale, font_style, y_stop):
def paste_image(page_image, image, coords): def paste_image(page_image, image, coords):
image = Image.fromarray(image.astype("uint8")).convert("RGBA") h, w = image.shape[:2]
page_image.paste(image, coords) x, y = coords
page_image[x : x + h, y : y + w] = image
return page_image return page_image

View File

@ -2,8 +2,8 @@ import gzip
import io import io
import numpy as np import numpy as np
import cv2
import pytest import pytest
from PIL import Image
from funcy import first from funcy import first
from cv_analysis.utils.structures import Rectangle from cv_analysis.utils.structures import Rectangle
@ -12,7 +12,7 @@ from incl.pyinfra.pyinfra.server.packing import bytes_to_string
@pytest.fixture @pytest.fixture
def random_image_as_bytes_and_compressed(random_image): def random_image_as_bytes_and_compressed(random_image):
image = Image.fromarray(random_image.astype("uint8")).convert("RGBA") image = cv2.cvtColor(random_image.astype("uint8"), cv2.COLOR_RGB2RGBA)
img_byte_arr = io.BytesIO() img_byte_arr = io.BytesIO()
image.save(img_byte_arr, format="PNG") image.save(img_byte_arr, format="PNG")
return gzip.compress(img_byte_arr.getvalue()) return gzip.compress(img_byte_arr.getvalue())

View File

@ -6,7 +6,7 @@ from funcy import first
from cv_analysis.locations import TEST_DATA_DIR from cv_analysis.locations import TEST_DATA_DIR
from cv_analysis.utils.draw import draw_rectangles from cv_analysis.utils.draw import draw_rectangles
from cv_analysis.utils.preprocessing import open_pdf from cv_analysis.utils.open_pdf import open_pdf
from test.fixtures.figure_detection import paste_text from test.fixtures.figure_detection import paste_text
@ -24,9 +24,7 @@ def expected_table_annotation(test_file_index):
@pytest.fixture @pytest.fixture
def page_with_table( def page_with_table(background, table_shape, table_style, n_tables, line_thickness, line_type):
background, table_shape, table_style, n_tables, line_thickness, line_type
):
page = draw_table( page = draw_table(
background, background,
(100, 100), (100, 100),
@ -36,9 +34,7 @@ def page_with_table(
line_type=line_type, line_type=line_type,
) )
if n_tables == 2: if n_tables == 2:
page = draw_table( page = draw_table(page, (200, 2000), table_shape, table_style, line_thickness, line_type)
page, (200, 2000), table_shape, table_style, line_thickness, line_type
)
return page return page
@ -205,9 +201,7 @@ def expected_gold_page_with_table(page_with_table, n_tables):
return result return result
def draw_table( def draw_table(page, table_position, table_shape, table_style, line_thickness, line_type):
page, table_position, table_shape, table_style, line_thickness, line_type
):
bbox_table = (*table_position, 1500, 1000) bbox_table = (*table_position, 1500, 1000)
page = draw_grid_lines( page = draw_grid_lines(
page, page,

View File

@ -6,7 +6,7 @@ from cv_analysis.figure_detection.text import (
remove_primary_text_regions, remove_primary_text_regions,
apply_threshold_to_image, apply_threshold_to_image,
) )
from cv_analysis.utils.display import show_mpl from cv_analysis.utils.display import show_image
from test.utils.utils import powerset from test.utils.utils import powerset
@ -25,33 +25,19 @@ class TestFindPrimaryTextRegions:
np.testing.assert_equal(result_page, apply_threshold_to_image(page_with_images)) np.testing.assert_equal(result_page, apply_threshold_to_image(page_with_images))
@pytest.mark.parametrize("font_scale", [1, 1.5, 2]) @pytest.mark.parametrize("font_scale", [1, 1.5, 2])
@pytest.mark.parametrize( @pytest.mark.parametrize("font_style", [cv2.FONT_HERSHEY_SIMPLEX, cv2.FONT_HERSHEY_COMPLEX])
"font_style", [cv2.FONT_HERSHEY_SIMPLEX, cv2.FONT_HERSHEY_COMPLEX]
)
@pytest.mark.parametrize("text_types", powerset(["body", "header", "caption"])) @pytest.mark.parametrize("text_types", powerset(["body", "header", "caption"]))
def test_page_with_only_text_gets_text_removed( def test_page_with_only_text_gets_text_removed(self, page_with_text, error_tolerance):
self, page_with_text, error_tolerance
):
result_page = remove_primary_text_regions(page_with_text) result_page = remove_primary_text_regions(page_with_text)
relative_error = ( relative_error = np.sum(result_page != apply_threshold_to_image(page_with_text)) / result_page.size
np.sum(result_page != apply_threshold_to_image(page_with_text))
/ result_page.size
)
assert relative_error <= error_tolerance assert relative_error <= error_tolerance
@pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)]) @pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)])
@pytest.mark.parametrize("n_images", [1, 2]) @pytest.mark.parametrize("n_images", [1, 2])
@pytest.mark.parametrize("font_scale", [1, 1.5, 2]) @pytest.mark.parametrize("font_scale", [1, 1.5, 2])
@pytest.mark.parametrize( @pytest.mark.parametrize("font_style", [cv2.FONT_HERSHEY_SIMPLEX, cv2.FONT_HERSHEY_COMPLEX])
"font_style", [cv2.FONT_HERSHEY_SIMPLEX, cv2.FONT_HERSHEY_COMPLEX]
)
@pytest.mark.parametrize("text_types", powerset(["body", "header", "caption"])) @pytest.mark.parametrize("text_types", powerset(["body", "header", "caption"]))
def test_page_with_images_and_text_keeps_images( def test_page_with_images_and_text_keeps_images(self, page_with_images_and_text, error_tolerance):
self, page_with_images_and_text, error_tolerance
):
result_page = remove_primary_text_regions(page_with_images_and_text) result_page = remove_primary_text_regions(page_with_images_and_text)
relative_error = ( relative_error = np.sum(result_page != apply_threshold_to_image(page_with_images_and_text)) / result_page.size
np.sum(result_page != apply_threshold_to_image(page_with_images_and_text))
/ result_page.size
)
assert relative_error <= error_tolerance assert relative_error <= error_tolerance

View File

@ -6,9 +6,7 @@ from cv_analysis.server.stream import make_streamable_analysis_fn
@pytest.mark.parametrize("operation", ["mock"]) @pytest.mark.parametrize("operation", ["mock"])
@pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)]) @pytest.mark.parametrize("image_size", [(200, 200), (500, 500), (800, 800)])
def test_make_analysis_fn( def test_make_analysis_fn(analysis_fn_mock, random_image_metadata_package, expected_analyse_metadata):
analysis_fn_mock, random_image_metadata_package, expected_analyse_metadata
):
analyse = make_streamable_analysis_fn(analysis_fn_mock) analyse = make_streamable_analysis_fn(analysis_fn_mock)
results = first(analyse(random_image_metadata_package)) results = first(analyse(random_image_metadata_package))