diff --git a/image_prediction/image_extractor/extractors/parsable.py b/image_prediction/image_extractor/extractors/parsable.py index 2c2f804..46f3f10 100644 --- a/image_prediction/image_extractor/extractors/parsable.py +++ b/image_prediction/image_extractor/extractors/parsable.py @@ -55,15 +55,12 @@ class ParsablePDFImageExtractor(ImageExtractor): maybe_image_metadata_pairs = map(partial(metadatum_to_image_metadata_pair, self.doc), metadata) image_metadata_pairs = [pair.value for pair in maybe_image_metadata_pairs if pair.is_just()] - print(image_metadata_pairs) clear_caches() # TODO: In the future, consider to introduce an image validator as a pipeline component rather than doing the # validation here. Invalid images can then be split into a different stream and joined with the intact images # again for the formatting step. image_metadata_pairs = self.__filter_valid_images(image_metadata_pairs) - image_metadata_pairs = list(image_metadata_pairs) - print(image_metadata_pairs) image_metadata_pairs = stitch_pairs(list(image_metadata_pairs), tolerance=self.tolerance) @@ -136,7 +133,8 @@ def xref_to_maybe_image(doc, xref) -> Maybe: def make_maybe_image_metadata_pair(image: Maybe, metadata: Maybe): - return Just(image.bind(curry(2, make_image_metadata_pair))).amap(metadata) + f = image.bind(lambda img: Just(lambda mdt: ImageMetadataPair(img, mdt))) + return f.amap(metadata) def make_image_metadata_pair(image: Image.Image, metadatum: dict) -> Just: diff --git a/test/unit_tests/image_extractor_test.py b/test/unit_tests/image_extractor_test.py index fa006e9..e0ac175 100644 --- a/test/unit_tests/image_extractor_test.py +++ b/test/unit_tests/image_extractor_test.py @@ -98,4 +98,4 @@ def test_bad_xref_handling(bad_xref_pdf, dvc_test_data): with pytest.raises(BadXref): extract_image(doc, xref) - assert xref_to_maybe_image(doc, xref) is None + assert xref_to_maybe_image(doc, xref).is_nothing()