From 436a32ad2b4ea213c0c0724e30ae75deef6ff3cf Mon Sep 17 00:00:00 2001 From: Julius Unverfehrt Date: Wed, 1 Feb 2023 15:07:35 +0100 Subject: [PATCH] refactor scanned page filtering WIP --- image_prediction/image_extractor/extractors/parsable.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/image_prediction/image_extractor/extractors/parsable.py b/image_prediction/image_extractor/extractors/parsable.py index 5010c25..f6bbc82 100644 --- a/image_prediction/image_extractor/extractors/parsable.py +++ b/image_prediction/image_extractor/extractors/parsable.py @@ -50,8 +50,11 @@ class ParsablePDFImageExtractor(ImageExtractor): metadata = filter_metadata_for_scanned_pages(metadata) metadata = list(filter_out_tiny_images(metadata)) metadata = list(filter_invalid_metadata(metadata)) + metadata = add_alpha_channel_info(self.doc, page, metadata) + images = get_images_on_page(self.doc, metadata) + clear_caches() image_metadata_pairs = starmap(ImageMetadataPair, filter(all, zip(images, metadata))) @@ -173,7 +176,7 @@ def add_alpha_channel_info(doc, page, metadata): alpha_to_dict = compose(dict, lambda a: [(Info.ALPHA, a)]) page_to_alpha_mapping_per_image = compose(lift(alpha_to_dict), page_to_alpha_value_per_image) - metadata = starmap(merge, zip(page_to_alpha_mapping_per_image(page), metadata)) + metadata = starmap(merge, zip(metadata, page_to_alpha_mapping_per_image(page))) yield from metadata