refactor scanned page filtering WIP
This commit is contained in:
parent
9ec6cc19ba
commit
436a32ad2b
@ -50,8 +50,11 @@ class ParsablePDFImageExtractor(ImageExtractor):
|
|||||||
metadata = filter_metadata_for_scanned_pages(metadata)
|
metadata = filter_metadata_for_scanned_pages(metadata)
|
||||||
metadata = list(filter_out_tiny_images(metadata))
|
metadata = list(filter_out_tiny_images(metadata))
|
||||||
metadata = list(filter_invalid_metadata(metadata))
|
metadata = list(filter_invalid_metadata(metadata))
|
||||||
|
|
||||||
metadata = add_alpha_channel_info(self.doc, page, metadata)
|
metadata = add_alpha_channel_info(self.doc, page, metadata)
|
||||||
|
|
||||||
images = get_images_on_page(self.doc, metadata)
|
images = get_images_on_page(self.doc, metadata)
|
||||||
|
|
||||||
clear_caches()
|
clear_caches()
|
||||||
|
|
||||||
image_metadata_pairs = starmap(ImageMetadataPair, filter(all, zip(images, metadata)))
|
image_metadata_pairs = starmap(ImageMetadataPair, filter(all, zip(images, metadata)))
|
||||||
@ -173,7 +176,7 @@ def add_alpha_channel_info(doc, page, metadata):
|
|||||||
alpha_to_dict = compose(dict, lambda a: [(Info.ALPHA, a)])
|
alpha_to_dict = compose(dict, lambda a: [(Info.ALPHA, a)])
|
||||||
page_to_alpha_mapping_per_image = compose(lift(alpha_to_dict), page_to_alpha_value_per_image)
|
page_to_alpha_mapping_per_image = compose(lift(alpha_to_dict), page_to_alpha_value_per_image)
|
||||||
|
|
||||||
metadata = starmap(merge, zip(page_to_alpha_mapping_per_image(page), metadata))
|
metadata = starmap(merge, zip(metadata, page_to_alpha_mapping_per_image(page)))
|
||||||
|
|
||||||
yield from metadata
|
yield from metadata
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user