from numpy import frombuffer, ndarray import cv2 def preprocess_page_array(page): if len(page.shape) > 2: page = cv2.cvtColor(page, cv2.COLOR_BGR2GRAY) page = cv2.fastNlMeansDenoising(page, h=3) return page def page2image(page): if type(page) == bytes: page = frombuffer(page) elif type(page) == ndarray: page = page elif type(page) == str: if page.lower().endswith((".png", ".jpg", ".jpeg")): page = cv2.imread(page) else: raise IOError( "PDFs are not a valid input type for cv-analysis." " Use PNGs for tests and NumPy arrays for deployment." ) else: raise TypeError("Incompatible datatype. Expected bytes, numpy.ndarray, or path to an image file.") return preprocess_page_array(page)