change default deskew function from hough-line-based to pixel-histogram-based; use scipy.ndimage.rotation

This commit is contained in:
Isaac Riley 2022-02-22 10:18:41 +01:00
parent 59e082379c
commit 8ff5147ee4
5 changed files with 40 additions and 12 deletions

View File

@ -6,4 +6,4 @@ imutils==0.5.4
iteration-utilities==0.11.0
dvc==2.9.3
dvc[ssh]
scipy~=1.8.0

View File

@ -4,7 +4,7 @@ import numpy as np
import pdf2image
from vidocp.utils.display import show_mpl
from vidocp.utils.deskew import deskew_image
from vidocp.utils.deskew import deskew_histbased, deskew_linebased
def parse_args():
@ -23,5 +23,7 @@ if __name__ == "__main__":
page = np.array(page)
show_mpl(page)
page = deskew_image(page, verbose=True)
show_mpl(page)
page_ = deskew_linebased(page, verbose=True)
show_mpl(page_)
page_ = deskew_histbased(page, verbose=True)
show_mpl(page_)

View File

@ -4,6 +4,7 @@ from pdf2image import pdf2image
from vidocp.utils.display import show_mpl
from vidocp.utils.draw import draw_stats
from vidocp.utils.deskew import deskew_histbased
def add_external_contours(image, img):
@ -33,7 +34,7 @@ def isolate_vertical_and_horizontal_components(img_bin):
def parse_table(image: np.array):
gray_scale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray_scale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape)>2 else image
th1, img_bin = cv2.threshold(gray_scale, 150, 255, cv2.THRESH_BINARY)
img_bin = ~img_bin
@ -45,10 +46,12 @@ def parse_table(image: np.array):
return stats
def annotate_tables_in_pdf(pdf_path, page_index=1):
def annotate_tables_in_pdf(pdf_path, page_index=1, deskew=True):
page = pdf2image.convert_from_path(pdf_path, first_page=page_index + 1, last_page=page_index + 1)[0]
page = np.array(page)
if deskew:
page = deskew_histbased(page)
stats = parse_table(page)
page = draw_stats(page, stats)

View File

@ -1,8 +1,9 @@
import numpy as np
from scipy.ndimage import rotate
import cv2
def detect_angle(im: np.array, max_skew_deg=10, min_skew_deg=0.1, min_nlines=5) -> int:
def detect_angle_from_lines(im: np.array, max_skew_deg=10, min_skew_deg=0.1, min_nlines=5) -> int:
max_skew_rad = np.deg2rad(max_skew_deg)
min_skew_rad = np.deg2rad(min_skew_deg)
width = im.shape[1]
@ -41,11 +42,35 @@ def rotate_straight(im: np.array, skew_angle: int) -> np.array:
return rotated
def deskew_image(image: np.array, verbose=False) -> np.array:
skew_angle = detect_angle(image)
def deskew_linebased(image: np.array, verbose=False) -> np.array:
skew_angle = detect_angle_from_lines(image)
if verbose:
print(f"Skew angle: {skew_angle}")
if skew_angle:
deskewed = rotate_straight(image, skew_angle)
return deskewed
return image
def deskew_histbased(page: np.array, max_abs_angle=1.5, delta=0.15, mode="nearest", verbose=False):
page = cv2.cvtColor(page, cv2.COLOR_BGR2GRAY)
page = cv2.fastNlMeansDenoising(page, h=3)
w, h = page.shape
def find_score(arr, angle):
data = rotate(arr, angle, reshape=False, order=0)
hist = np.sum(data, axis=1)
score = np.sum((hist[1:] - hist[:-1]) ** 2)
return score
angles = np.arange(-max_abs_angle, max_abs_angle + delta, delta)
scores = []
for angle in angles:
scores.append(find_score(page, angle))
best_angle = angles[scores.index(max(scores))]
if verbose:
print("Best angle: {}".format(best_angle))
rotated = rotate(page, best_angle, reshape=False, order=0, mode=mode)
return rotated

View File

@ -3,14 +3,12 @@ from matplotlib import pyplot as plt
def show_mpl(image):
fig, ax = plt.subplots(1, 1)
fig.set_size_inches(20, 20)
ax.imshow(image)
ax.imshow(image, cmap="gray")
plt.show()
def show_cv2(image):
cv2.imshow("", image)
cv2.waitKey(0)