30 lines
841 B
Python
30 lines
841 B
Python
from numpy import frombuffer, ndarray
|
|
import cv2
|
|
|
|
|
|
def preprocess_page_array(page):
|
|
if len(page.shape) > 2:
|
|
page = cv2.cvtColor(page, cv2.COLOR_BGR2GRAY)
|
|
page = cv2.fastNlMeansDenoising(page, h=3)
|
|
return page
|
|
|
|
|
|
def page2image(page):
|
|
|
|
if type(page) == bytes:
|
|
page = frombuffer(page)
|
|
elif type(page) == ndarray:
|
|
page = page
|
|
elif type(page) == str:
|
|
if page.lower().endswith((".png", ".jpg", ".jpeg")):
|
|
page = cv2.imread(page)
|
|
else:
|
|
raise IOError(
|
|
"PDFs are not a valid input type for cv-analysis."
|
|
" Use PNGs for tests and NumPy arrays for deployment."
|
|
)
|
|
else:
|
|
raise TypeError("Incompatible datatype. Expected bytes, numpy.ndarray, or path to an image file.")
|
|
|
|
return preprocess_page_array(page)
|