refactor scanned page filtering WIP

This commit is contained in:
Julius Unverfehrt 2023-02-01 15:07:35 +01:00
parent 9ec6cc19ba
commit 436a32ad2b

View File

@ -50,8 +50,11 @@ class ParsablePDFImageExtractor(ImageExtractor):
metadata = filter_metadata_for_scanned_pages(metadata)
metadata = list(filter_out_tiny_images(metadata))
metadata = list(filter_invalid_metadata(metadata))
metadata = add_alpha_channel_info(self.doc, page, metadata)
images = get_images_on_page(self.doc, metadata)
clear_caches()
image_metadata_pairs = starmap(ImageMetadataPair, filter(all, zip(images, metadata)))
@ -173,7 +176,7 @@ def add_alpha_channel_info(doc, page, metadata):
alpha_to_dict = compose(dict, lambda a: [(Info.ALPHA, a)])
page_to_alpha_mapping_per_image = compose(lift(alpha_to_dict), page_to_alpha_value_per_image)
metadata = starmap(merge, zip(page_to_alpha_mapping_per_image(page), metadata))
metadata = starmap(merge, zip(metadata, page_to_alpha_mapping_per_image(page)))
yield from metadata