diff --git a/image_prediction/image_extractor/extractors/parsable.py b/image_prediction/image_extractor/extractors/parsable.py index 9fe5b46..8914b01 100644 --- a/image_prediction/image_extractor/extractors/parsable.py +++ b/image_prediction/image_extractor/extractors/parsable.py @@ -50,6 +50,20 @@ class ParsablePDFImageExtractor(ImageExtractor): yield from image_metadata_pairs + # def __preprocess(self, image_metadata_pair): + # image, metadata = image_metadata_pair + # + # try: + # image = self.__resize_and_convert(image) + # image_metadata_pair = ImageMetadataPair(image, metadata) + # except Exception as err: + # logger.warn( + # f"{err}: couldn't preprocess image [ page_idx: {metadata[Info.PAGE_IDX]}, x1: {metadata[Info.X1]}, y1: {metadata[Info.Y1]}, width: {metadata[Info.WIDTH]}, height: {metadata[Info.HEIGHT]} ]" + # ) + # image_metadata_pair = None + # + # return image_metadata_pair + def extract_pages(doc, page_range): page_range = range(page_range.start + 1, page_range.stop + 1) diff --git a/test/unit_tests/image_extractor_test.py b/test/unit_tests/image_extractor_test.py index e52b2b5..7ee2e05 100644 --- a/test/unit_tests/image_extractor_test.py +++ b/test/unit_tests/image_extractor_test.py @@ -5,7 +5,7 @@ import fitz import fpdf import pytest from PIL import Image -from funcy import first, rest +from funcy import first, rest, lmap from image_prediction.extraction import extract_images_from_pdf from image_prediction.image_extractor.extractor import ImageMetadataPair @@ -27,6 +27,7 @@ def test_image_extractor_mock(image_extractor, images): @pytest.mark.parametrize("alpha", [False, True]) def test_parsable_pdf_image_extractor(image_extractor, pdf, images, metadata, input_size, alpha): images_extracted, metadata_extracted = map(list, extract_images_from_pdf(pdf, image_extractor)) + if not alpha: assert image_sets_equal(images_extracted, images) assert metadata_equal(metadata_extracted, metadata)