[WIP] Monadic refactoring
This commit is contained in:
parent
4e3168e51c
commit
906fee0e5d
@ -55,15 +55,12 @@ class ParsablePDFImageExtractor(ImageExtractor):
|
|||||||
|
|
||||||
maybe_image_metadata_pairs = map(partial(metadatum_to_image_metadata_pair, self.doc), metadata)
|
maybe_image_metadata_pairs = map(partial(metadatum_to_image_metadata_pair, self.doc), metadata)
|
||||||
image_metadata_pairs = [pair.value for pair in maybe_image_metadata_pairs if pair.is_just()]
|
image_metadata_pairs = [pair.value for pair in maybe_image_metadata_pairs if pair.is_just()]
|
||||||
print(image_metadata_pairs)
|
|
||||||
clear_caches()
|
clear_caches()
|
||||||
|
|
||||||
# TODO: In the future, consider to introduce an image validator as a pipeline component rather than doing the
|
# TODO: In the future, consider to introduce an image validator as a pipeline component rather than doing the
|
||||||
# validation here. Invalid images can then be split into a different stream and joined with the intact images
|
# validation here. Invalid images can then be split into a different stream and joined with the intact images
|
||||||
# again for the formatting step.
|
# again for the formatting step.
|
||||||
image_metadata_pairs = self.__filter_valid_images(image_metadata_pairs)
|
image_metadata_pairs = self.__filter_valid_images(image_metadata_pairs)
|
||||||
image_metadata_pairs = list(image_metadata_pairs)
|
|
||||||
print(image_metadata_pairs)
|
|
||||||
|
|
||||||
image_metadata_pairs = stitch_pairs(list(image_metadata_pairs), tolerance=self.tolerance)
|
image_metadata_pairs = stitch_pairs(list(image_metadata_pairs), tolerance=self.tolerance)
|
||||||
|
|
||||||
@ -136,7 +133,8 @@ def xref_to_maybe_image(doc, xref) -> Maybe:
|
|||||||
|
|
||||||
|
|
||||||
def make_maybe_image_metadata_pair(image: Maybe, metadata: Maybe):
|
def make_maybe_image_metadata_pair(image: Maybe, metadata: Maybe):
|
||||||
return Just(image.bind(curry(2, make_image_metadata_pair))).amap(metadata)
|
f = image.bind(lambda img: Just(lambda mdt: ImageMetadataPair(img, mdt)))
|
||||||
|
return f.amap(metadata)
|
||||||
|
|
||||||
|
|
||||||
def make_image_metadata_pair(image: Image.Image, metadatum: dict) -> Just:
|
def make_image_metadata_pair(image: Image.Image, metadatum: dict) -> Just:
|
||||||
|
|||||||
@ -98,4 +98,4 @@ def test_bad_xref_handling(bad_xref_pdf, dvc_test_data):
|
|||||||
with pytest.raises(BadXref):
|
with pytest.raises(BadXref):
|
||||||
extract_image(doc, xref)
|
extract_image(doc, xref)
|
||||||
|
|
||||||
assert xref_to_maybe_image(doc, xref) is None
|
assert xref_to_maybe_image(doc, xref).is_nothing()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user