Matthias Bisping 94e9210faf Refactoring
Various
2023-01-09 11:21:43 +01:00

30 lines
1.1 KiB
Python

from numpy import array, ndarray
import pdf2image
from PIL import Image
from cv_analysis.utils.preprocessing import preprocess_page_array
def open_analysis_input_file(path_or_bytes, first_page=1, last_page=None):
assert first_page > 0, "Page numbers are 1-based."
assert last_page is None or last_page >= first_page, "last_page must be greater than or equal to first_page."
last_page = last_page or first_page
if type(path_or_bytes) == str:
if path_or_bytes.lower().endswith((".png", ".jpg", ".jpeg")):
pages = [Image.open(path_or_bytes)]
elif path_or_bytes.lower().endswith(".pdf"):
pages = pdf2image.convert_from_path(path_or_bytes, first_page=first_page, last_page=last_page)
else:
raise IOError("Invalid file extension. Accepted filetypes: .png, .jpg, .jpeg, .pdf")
elif type(path_or_bytes) == bytes:
pages = pdf2image.convert_from_bytes(path_or_bytes, first_page=first_page, last_page=last_page)
elif type(path_or_bytes) in {list, ndarray}:
return path_or_bytes
pages = [preprocess_page_array(array(p)) for p in pages]
return pages