30 lines
841 B
Python

from numpy import frombuffer, ndarray
import cv2
def preprocess_page_array(page):
if len(page.shape) > 2:
page = cv2.cvtColor(page, cv2.COLOR_BGR2GRAY)
page = cv2.fastNlMeansDenoising(page, h=3)
return page
def page2image(page):
if type(page) == bytes:
page = frombuffer(page)
elif type(page) == ndarray:
page = page
elif type(page) == str:
if page.lower().endswith((".png", ".jpg", ".jpeg")):
page = cv2.imread(page)
else:
raise IOError(
"PDFs are not a valid input type for cv-analysis."
" Use PNGs for tests and NumPy arrays for deployment."
)
else:
raise TypeError("Incompatible datatype. Expected bytes, numpy.ndarray, or path to an image file.")
return preprocess_page_array(page)