82 lines
2.5 KiB
Python

import numpy as np
from scipy.ndimage import rotate as rotate_
import cv2
from cv_analysis.config import CONFIG
def rotate_straight(im: np.array, skew_angle: int) -> np.array:
h, w = im.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, skew_angle, 1.0)
rotated = cv2.warpAffine(im, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
return rotated
def find_score(arr, angle):
data = rotate_(arr, angle, reshape=False, order=0, mode=CONFIG.deskew.mode)
hist = np.sum(data, axis=1)
score = np.sum((hist[1:] - hist[:-1]) ** 2)
return score
def find_best_angle(page):
lim = CONFIG.deskew.max_abs_angle
delta = CONFIG.deskew.delta
angles = np.arange(-lim, lim + delta, delta)
scores = [find_score(page, angle) for angle in angles]
best_angle = angles[scores.index(max(scores))]
return best_angle
def preprocess(arr: np.array):
if len(arr.shape) > 2:
arr = cv2.cvtColor(arr, cv2.COLOR_BGR2GRAY)
arr = cv2.fastNlMeansDenoising(arr, h=CONFIG.deskew.filter_strength_h)
return arr
def rotate(page, angle):
rotated = rotate_(page, angle, reshape=False, order=0, mode="nearest")
return rotated
def deskew_histbased(page: np.array):
page = preprocess(page)
best_angle = round(find_best_angle(page), 3)
if CONFIG.deskew.verbose:
print("Skew angle from pixel histogram: {}".format(best_angle))
rotated = rotate(page, best_angle)
return (rotated, best_angle)
def needs_deskew(page: np.array) -> bool:
"""
Makes use of 'row-wise mean difference' - the difference between neighboring - on left and right halves
"""
def split_rowmean_diff(page):
width = page.shape[1]
cutpoint = int(width / 2)
left = page[:, :cutpoint]
right = page[:, cutpoint:]
leftmeans = np.mean(left, axis=1)
rightmeans = np.mean(right, axis=1)
return rightmeans - leftmeans
unrotated_score = np.mean(np.abs(split_rowmean_diff(page)))
angles = [-CONFIG.deskew.test_delta, CONFIG.deskew.test_delta]
scores = [np.mean(np.abs(split_rowmean_diff(rotate(page, angle)))) for angle in angles]
print(unrotated_score, scores)
return unrotated_score > min(scores)
if CONFIG.deskew.function == "hist":
deskew = lambda page: deskew_histbased(page) if needs_deskew(page) else (page, 0)
elif CONFIG.deskew.function == "identity":
deskew = lambda page: (page, None)
else:
raise ValueError("'{CONFIG.deskew.function}' is not a valid parameter value for CONFIG.deskew.function")