diff --git a/image_prediction/image_extractor/extractors/parsable.py b/image_prediction/image_extractor/extractors/parsable.py index 784a54f..39271c9 100644 --- a/image_prediction/image_extractor/extractors/parsable.py +++ b/image_prediction/image_extractor/extractors/parsable.py @@ -8,16 +8,18 @@ from typing import List, Union import fitz import numpy as np from PIL import Image -from funcy import merge, compose, rcompose, keep, lfilter +from funcy import merge, compose, rcompose, keep, lfilter, none from pymonad.either import Right, Left, Either from pymonad.tools import curry, identity from image_prediction.exceptions import InvalidBox, BadXref +from image_prediction.formatter.formatters.enum import EnumFormatter from image_prediction.image_extractor.extractor import ImageExtractor, ImageMetadataPair from image_prediction.info import Info from image_prediction.stitching.stitching import stitch_pairs from image_prediction.stitching.utils import validate_box from image_prediction.utils import get_logger +from image_prediction.utils.generic import bottom logger = get_logger() @@ -58,8 +60,8 @@ class ParsablePDFImageExtractor(ImageExtractor): def right(pair: Either): if pair.is_right(): - return pair.either(identity, identity) - logger.warning(f"Skipping bad image. reason: {pair.either(identity, identity)}") + return pair.either(bottom, identity) + logger.warning(f"Skipping bad image. {pair.either(identity, bottom)}") def extract_pages(doc, page_range): @@ -127,8 +129,12 @@ def xref_to_maybe_image(doc, xref) -> Either: def make_maybe_image_metadata_pair(image: Either, metadata: Either): # haskell.org/tutorial/monads.html # (>>) :: m a -> m b -> m b - return Right(make_image_metadata_pair).amap(image).amap(metadata) - # TODO: Somehow metadata needs to be added to Lefts for logging the reference to the invalid image + either = Right(make_image_metadata_pair).amap(image).amap(metadata) + return ( + Left({"reason": either.either(identity, bottom), "metadata": EnumFormatter()(metadata.value)}) + if either.is_left() + else either + ) @curry(2) diff --git a/image_prediction/utils/generic.py b/image_prediction/utils/generic.py index de71a5c..ffe835e 100644 --- a/image_prediction/utils/generic.py +++ b/image_prediction/utils/generic.py @@ -13,3 +13,7 @@ def lift(fn): def starlift(fn): return curry(starmap)(fn) + + +def bottom(*args, **kwargs): + return None