diff --git a/image_prediction/image_extractor/extractors/parsable.py b/image_prediction/image_extractor/extractors/parsable.py index b65e3d3..941f431 100644 --- a/image_prediction/image_extractor/extractors/parsable.py +++ b/image_prediction/image_extractor/extractors/parsable.py @@ -136,7 +136,11 @@ def get_image_infos(page: fitz.Page) -> List[dict]: @lru_cache(maxsize=None) def xref_to_image(doc, xref) -> Image: pixmap = fitz.Pixmap(doc, xref) - array = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(pixmap.h, pixmap.w) + # FIXME: implement proper logic to determine how many channels the image has + try: + array = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(pixmap.h, pixmap.w) + except ValueError: + array = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(pixmap.h, pixmap.w, pixmap.n) return Image.fromarray(array)