From c03913e088c227dced6454e972ede2c55c358fcc Mon Sep 17 00:00:00 2001 From: Julius Unverfehrt Date: Mon, 29 Aug 2022 13:01:42 +0200 Subject: [PATCH] Pull request #26: RED-5107: move image normalization for predictor to image extraction step to be able to properly catch exeption thrown from this step Merge in RR/image-prediction from RED-5107-hotfix to release/3.4.1 Squashed commit of the following: commit b7b99074054e67201537efc2f0a5b96f29bd1684 Author: Julius Unverfehrt Date: Mon Aug 29 12:57:50 2022 +0200 RED-5107: move image normalization for predictor to image extraction step to be able to properly catch exeption thrown from this step --- .../image_extractor/extractors/parsable.py | 22 ++++++++++++++++++- .../redai_adapter/model_wrapper.py | 4 ---- test/unit_tests/image_extractor_test.py | 9 +++++++- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/image_prediction/image_extractor/extractors/parsable.py b/image_prediction/image_extractor/extractors/parsable.py index 9fe5b46..ff58710 100644 --- a/image_prediction/image_extractor/extractors/parsable.py +++ b/image_prediction/image_extractor/extractors/parsable.py @@ -2,7 +2,7 @@ import atexit import io from functools import partial, lru_cache from itertools import chain, starmap, filterfalse -from operator import itemgetter +from operator import itemgetter, truth from typing import List import fitz @@ -30,6 +30,8 @@ class ParsablePDFImageExtractor(ImageExtractor): self.doc: fitz.fitz.Document = None self.verbose = verbose self.tolerance = tolerance + # TODO: Move assignment of input shape for predictor, should not be set here since dependent on predictor + self.input_shape = (224, 224, 3) def extract(self, pdf: bytes, page_range: range = None): self.doc = fitz.Document(stream=pdf) @@ -47,9 +49,27 @@ class ParsablePDFImageExtractor(ImageExtractor): image_metadata_pairs = starmap(ImageMetadataPair, filter(all, zip(images, metadata))) image_metadata_pairs = stitch_pairs(list(image_metadata_pairs), tolerance=self.tolerance) + image_metadata_pairs = filter(truth, map(self.__preprocess, image_metadata_pairs)) yield from image_metadata_pairs + def __preprocess(self, image_metadata_pair): + image, metadata = image_metadata_pair + + try: + image = self.__resize_and_convert(image) + image_metadata_pair = ImageMetadataPair(image, metadata) + except Exception as err: + logger.warn( + f"{err}: couldn't preprocess image [ page_idx: {metadata[Info.PAGE_IDX]}, x1: {metadata[Info.X1]}, y1: {metadata[Info.Y1]}, width: {metadata[Info.WIDTH]}, height: {metadata[Info.HEIGHT]} ]" + ) + image_metadata_pair = None + + return image_metadata_pair + + def __resize_and_convert(self, image): + return image.resize(self.input_shape[:-1]).convert("RGB") + def extract_pages(doc, page_range): page_range = range(page_range.start + 1, page_range.stop + 1) diff --git a/image_prediction/redai_adapter/model_wrapper.py b/image_prediction/redai_adapter/model_wrapper.py index 776931e..2e35c1a 100644 --- a/image_prediction/redai_adapter/model_wrapper.py +++ b/image_prediction/redai_adapter/model_wrapper.py @@ -27,11 +27,7 @@ class ModelWrapper(abc.ABC): def __images_to_tensor(images): return np.array(list(map(tf.keras.preprocessing.image.img_to_array, images))) - def __resize_and_convert(self, image): - return image.resize(self.input_shape[:-1]).convert("RGB") - def prep_images(self, images): - images = map(self.__resize_and_convert, images) tensor = self.__images_to_tensor(images) tensor = self.__preprocess_tensor(tensor) diff --git a/test/unit_tests/image_extractor_test.py b/test/unit_tests/image_extractor_test.py index e52b2b5..bf9dfd0 100644 --- a/test/unit_tests/image_extractor_test.py +++ b/test/unit_tests/image_extractor_test.py @@ -5,7 +5,7 @@ import fitz import fpdf import pytest from PIL import Image -from funcy import first, rest +from funcy import first, rest, lmap from image_prediction.extraction import extract_images_from_pdf from image_prediction.image_extractor.extractor import ImageMetadataPair @@ -27,6 +27,13 @@ def test_image_extractor_mock(image_extractor, images): @pytest.mark.parametrize("alpha", [False, True]) def test_parsable_pdf_image_extractor(image_extractor, pdf, images, metadata, input_size, alpha): images_extracted, metadata_extracted = map(list, extract_images_from_pdf(pdf, image_extractor)) + + # TODO: move resize operation to expected images fixture + def __resize_and_convert(image): + return image.resize((224, 224)).convert("RGB") + + images = lmap(__resize_and_convert, images) + if not alpha: assert image_sets_equal(images_extracted, images) assert metadata_equal(metadata_extracted, metadata)