diff --git a/image_prediction/image_extractor/extractors/parsable.py b/image_prediction/image_extractor/extractors/parsable.py index a022396..9fe5b46 100644 --- a/image_prediction/image_extractor/extractors/parsable.py +++ b/image_prediction/image_extractor/extractors/parsable.py @@ -13,8 +13,11 @@ from image_prediction.image_extractor.extractor import ImageExtractor, ImageMeta from image_prediction.info import Info from image_prediction.stitching.stitching import stitch_pairs from image_prediction.stitching.utils import validate_box_coords, validate_box_size +from image_prediction.utils import get_logger from image_prediction.utils.generic import lift +logger = get_logger() + class ParsablePDFImageExtractor(ImageExtractor): def __init__(self, verbose=False, tolerance=0): @@ -162,7 +165,11 @@ def has_alpha_channel(doc, xref): if maybe_smask: return any([doc.extract_image(maybe_smask) is not None, bool(fitz.Pixmap(doc, maybe_smask).alpha)]) else: - return bool(fitz.Pixmap(doc, xref).alpha) + try: + return bool(fitz.Pixmap(doc, xref).alpha) + except ValueError: + logger.debug(f"Encountered invalid xref `{xref}` in {doc.metadata.get('title', '')}.") + return False def tiny(metadata):