diff --git a/image_prediction/image_extractor/extractors/parsable.py b/image_prediction/image_extractor/extractors/parsable.py
index b65e3d3..941f431 100644
--- a/image_prediction/image_extractor/extractors/parsable.py
+++ b/image_prediction/image_extractor/extractors/parsable.py
@@ -136,7 +136,11 @@ def get_image_infos(page: fitz.Page) -> List[dict]:
 @lru_cache(maxsize=None)
 def xref_to_image(doc, xref) -> Image:
     pixmap = fitz.Pixmap(doc, xref)
-    array = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(pixmap.h, pixmap.w)
+    # FIXME: implement proper logic to determine how many channels the image has
+    try:
+        array = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(pixmap.h, pixmap.w)
+    except ValueError:
+        array = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(pixmap.h, pixmap.w, pixmap.n)
     return Image.fromarray(array)